# Remove leap years from COSMO-REA6 forcing data

COMSO reanalysis data uses a gregorian calendar with leap years. Running long simulations (necessary for spin-up) with COMSO reanalysis data proved impossible, because the leap years don't align and the model terminates prematurely after ~100 years. To be able to run much longer, I want to remove all occurrences of February 29th from the data and try running the model with calendar set to NO_LEAP.

This notebook will...
1. read in the cosmo reanalysis data
2. remove leap year Feb 29
3. save the modified data

In [6]:
# import libraries
import os
import netCDF4 as nc
import xarray as xr  # NetCDF data handling
import zipfile # for unzipping
import shutil # easiest whole-directory zipping
import glob # for wildcard * searching in file names
from pathlib import Path  # For easy path handling

Download COSMOREA data from evalieungh/FATES_INCLINE repo if necessary:

In [3]:
%%bash
pwd
cd ../data
wget https://raw.githubusercontent.com/evalieungh/FATES_INCLINE/main/data/ALP4_cosmorea.zip

/mnt/c/Users/evaler/OneDrive - Universitetet i Oslo/Eva/PHD/FATES_INCLINE/src


In [5]:
# set path to data, where we have the original (gregorian) data and will save the modified no-leap version
cosmo_path = str(Path(f"C:/Users/evaler/OneDrive - Universitetet i Oslo/Eva/PHD/FATES_INCLINE/data"))



In [7]:
print("extracting ", cosmo_path + "/ALP4_cosmorea.zip")
with zipfile.ZipFile(cosmo_path + "/ALP4_cosmorea.zip", 'r') as zip_ref:
    zip_ref.extractall(cosmo_path + "/ALP4_cosmorea")

extracting  C:\Users\evaler\OneDrive - Universitetet i Oslo\Eva\PHD\FATES_INCLINE\data/ALP4_cosmorea.zip


In [20]:
# Input and output file paths
input_dir = str(Path(cosmo_path + f"/ALP4_cosmorea/datmdata/"))
output_dir = str(Path(cosmo_path + f"/ALP4_cosmorea_noleap/datmdata/"))
print(input_dir)
print(output_dir)

C:\Users\evaler\OneDrive - Universitetet i Oslo\Eva\PHD\FATES_INCLINE\data\ALP4_cosmorea\datmdata
C:\Users\evaler\OneDrive - Universitetet i Oslo\Eva\PHD\FATES_INCLINE\data\ALP4_cosmorea_noleap\datmdata


In [22]:
# Get a list of all NetCDF files in the input directory
files = [f for f in os.listdir(input_dir) if f.endswith('.nc')]
print(files)

['clm1pt_ALP4_1995-01.nc', 'clm1pt_ALP4_1995-02.nc', 'clm1pt_ALP4_1995-03.nc', 'clm1pt_ALP4_1995-04.nc', 'clm1pt_ALP4_1995-05.nc', 'clm1pt_ALP4_1995-06.nc', 'clm1pt_ALP4_1995-07.nc', 'clm1pt_ALP4_1995-08.nc', 'clm1pt_ALP4_1995-09.nc', 'clm1pt_ALP4_1995-10.nc', 'clm1pt_ALP4_1995-11.nc', 'clm1pt_ALP4_1995-12.nc', 'clm1pt_ALP4_1996-01.nc', 'clm1pt_ALP4_1996-02.nc', 'clm1pt_ALP4_1996-03.nc', 'clm1pt_ALP4_1996-04.nc', 'clm1pt_ALP4_1996-05.nc', 'clm1pt_ALP4_1996-06.nc', 'clm1pt_ALP4_1996-07.nc', 'clm1pt_ALP4_1996-08.nc', 'clm1pt_ALP4_1996-09.nc', 'clm1pt_ALP4_1996-10.nc', 'clm1pt_ALP4_1996-11.nc', 'clm1pt_ALP4_1996-12.nc', 'clm1pt_ALP4_1997-01.nc', 'clm1pt_ALP4_1997-02.nc', 'clm1pt_ALP4_1997-03.nc', 'clm1pt_ALP4_1997-04.nc', 'clm1pt_ALP4_1997-05.nc', 'clm1pt_ALP4_1997-06.nc', 'clm1pt_ALP4_1997-07.nc', 'clm1pt_ALP4_1997-08.nc', 'clm1pt_ALP4_1997-09.nc', 'clm1pt_ALP4_1997-10.nc', 'clm1pt_ALP4_1997-11.nc', 'clm1pt_ALP4_1997-12.nc', 'clm1pt_ALP4_1998-01.nc', 'clm1pt_ALP4_1998-02.nc', 'clm1pt_ALP

In [24]:
# print variables in the first nc file
example_file = str(Path(input_dir + "/" + f"clm1pt_ALP4_1995-01.nc"))
with nc.Dataset(example_file, 'r') as ds:
    # List all variables in the file
    print("Variables in the file:")
    print(ds.variables.keys())

Variables in the file:
dict_keys(['EDGEW', 'EDGEE', 'EDGES', 'EDGEN', 'LONGXY', 'LATIXY', 'SWDIFDS_RAD', 'SWDIRS_RAD', 'RAIN_CON', 'RAIN_GSP', 'SNOW_GSP', 'SNOW_CON', 'PRECTmms', 'TBOT', 'WIND', 'PSRF', 'SHUM', 'FLDS', 'time'])


In [30]:
# get more info on time variable
with nc.Dataset(example_file, 'r') as ds:
    # Access the "time" variable
    time_var = ds.variables['time']

    # Print the variable dimensions
    print("Variable dimensions:", time_var.dimensions)

    # Print the variable shape
    print("Variable shape:", time_var.shape)

    # Print the variable attributes
    print("Variable attributes:", time_var.ncattrs())

    # Print a specific attribute
    print("Variable units:", time_var.units)

    # Print the variable values
    print("Variable values:", time_var[:])

Variable name: time
Variable dimensions: ('time',)
Variable shape: (248,)
Variable attributes: ['standard_name', 'units', 'calendar', 'axis']
Variable units: hours since 1995-1-1 01:00:00
Variable values: [  0.   3.   6.   9.  12.  15.  18.  21.  24.  27.  30.  33.  36.  39.
  42.  45.  48.  51.  54.  57.  60.  63.  66.  69.  72.  75.  78.  81.
  84.  87.  90.  93.  96.  99. 102. 105. 108. 111. 114. 117. 120. 123.
 126. 129. 132. 135. 138. 141. 144. 147. 150. 153. 156. 159. 162. 165.
 168. 171. 174. 177. 180. 183. 186. 189. 192. 195. 198. 201. 204. 207.
 210. 213. 216. 219. 222. 225. 228. 231. 234. 237. 240. 243. 246. 249.
 252. 255. 258. 261. 264. 267. 270. 273. 276. 279. 282. 285. 288. 291.
 294. 297. 300. 303. 306. 309. 312. 315. 318. 321. 324. 327. 330. 333.
 336. 339. 342. 345. 348. 351. 354. 357. 360. 363. 366. 369. 372. 375.
 378. 381. 384. 387. 390. 393. 396. 399. 402. 405. 408. 411. 414. 417.
 420. 423. 426. 429. 432. 435. 438. 441. 444. 447. 450. 453. 456. 459.
 462. 465. 468

***the next part does not work, because it assumes the time variable has month and day that can be used for subsetting***

In [25]:
# Loop through each file in the input directory
for file in files:
    input_file = os.path.join(input_dir, file)
    output_file = os.path.join(output_dir, file)

    # Open the input NetCDF file in read mode
    with nc.Dataset(input_file, mode='r') as ds_in:
        # Create a new NetCDF file for writing
        with nc.Dataset(output_file, mode='w') as ds_out:
            # Copy global attributes from input to output file
            for attr_name in ds_in.ncattrs():
                setattr(ds_out, attr_name, getattr(ds_in, attr_name))

            # Copy dimensions from input to output file
            for dim_name, dim in ds_in.dimensions.items():
                ds_out.createDimension(dim_name, len(dim) if not dim.isunlimited() else None)

            # Copy variables from input to output file
            for var_name, var in ds_in.variables.items():
                # Create the variable in the output file
                var_out = ds_out.createVariable(var_name, var.dtype, var.dimensions)

                # Copy variable attributes from input to output file
                for attr_name in var.ncattrs():
                    setattr(var_out, attr_name, getattr(var, attr_name))

                # Copy variable data from input to output file, excluding February 29
                for i, time_val in enumerate(var[:]):
                 # Check if the date is February 29
                    if nc.num2date(time_val).month == 2 and \
                            nc.num2date(time_val).day == 29:
                        continue

                    # Copy variable data to output file
                    var_out[i] = var[i]
            

    print(f'Saved the modified NetCDF file as: {output_file}')


TypeError: num2date() takes at least 2 positional arguments (1 given)