# This notebook saves CAFE v1 atmospheric daily climatologies in a single dataset

#### IMPORTANT: If adapting this code to save other climatologies, be careful to only include full years, as pyLatte will compute monthly climatologies from the saved daily climatologies

In [1]:
import pandas as pd
import xarray as xr
import numpy as np
from pylatte import utils
from ipywidgets import FloatProgress

#### Initialise

In [2]:
# Location of forecast data -----
fcst_folder = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/'
fcst_filename = 'atmos_daily*'

fields = pd.DataFrame( \
        {'name_CAFE': ['hght', 'sphum', 'temp', 'ucomp', 'vcomp'],
         'name_std' : ['gh',   'sphum', 'temp', 'u',     'v']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields = pd.DataFrame( \
        {'name_CAFE': ['temp'],
         'name_std' : ['temp']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields


Unnamed: 0,name_CAFE,name_std
0,temp,temp


In [3]:
# Initial dates to include (takes approximately 1 min 30 sec per date) -----
init_dates = pd.date_range('2002-2','2016-5' , freq='1MS')

# Ensembles to include -----
ensembles = range(1,12)

#### Load one 366 day long year to provide time array

In [4]:
path = fcst_folder + '/yr2016/mn1/OUTPUT.1/' + fcst_filename + '.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = dataset.time[:366]

#### Save each init month and variable separately due to memory considerations

In [31]:
years = range(2002,2017)
months = range(1,13)
ensembles = range(1,12)


for idx, variable in enumerate(fields['name_CAFE']):
    print(variable)
    print('----------')
        
    for year in years:
        print(year)

        for month in months:
            print(month)

            ens_list = []
            ens = []
            empty = True
            for ie, ensemble in enumerate(ensembles):

                path = fcst_folder + '/yr' + str(year) + '/mn' + str(month) + \
                       '/OUTPUT.' + str(ensemble) + '/' + fcst_filename + '.nc'
                
                # Try to stack ensembles into a list -----
                try:
                    dataset = xr.open_mfdataset(path, autoclose=True)[variable]
                    if 'latb' in dataset.dims:
                        dataset = dataset.rename({'latb':'lat_2','lonb':'lon_2'})
                    ens_list.append(dataset.rename(fields['name_std'][idx]))
                    ens.append(ie+1)
                    empty = False
                except OSError:
                    # File does not exist -----
                    pass

            # Concatenate ensembles -----
            if empty == False:
                ds = xr.concat(ens_list, dim='ensemble')
                ds['ensemble'] = ens

                # Make month_day array of month-day -----
                m = np.array([str(i).zfill(2) + '-' for i in ds.time.dt.month.values])
                d = np.array([str(i).zfill(2)  for i in ds.time.dt.day.values])
                md = np.core.defchararray.add(m, d)

                # Replace time array with month_day array and groupby -----
                ds['time'] = md
                ds_clim = ds.groupby('time').sum(dim='time',keep_attrs=True).to_dataset(name='sum').mean(dim='ensemble')
                get_len = lambda ds, dim : ds.count(dim=dim)
                ds_clim['count'] = ds['time'].groupby('time').apply(get_len, dim='time')
                
                # Fill time with presaved time -----
                # ds_clim['time'] = time_use
                # ds_clim.time.attrs['long_name'] = 'time'
                # ds_clim.time.attrs['cartesian_axis'] = 'T'
                # ds_clim.time.attrs['calendar_type'] = 'JULIAN'
                # ds_clim.time.attrs['bounds'] = 'time_bounds'
                # print(ds_clim)

                savename = 'cafe.fcst.v1.atmos.' + fields['name_std'][idx] + '.' + str(year) + '.' + str(month) + '.clim.nc'

                ds_clim.to_netcdf(path='/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/tmp/' + savename,
                                  mode = 'w')
                                  # encoding = {'time':{'dtype':'float','calendar':'JULIAN',
                                  #                    'units':'days since 0001-01-01 00:00:00'}})

                del ds, ds_clim

temp
----------
2002
1
2
3
4
5
6
7
8
9
10
11
12
2003
1
2
3
4
5
6
7
8
9
10
11
12
2004
1
2
3
4
5
6
7
8
9
10
11
12
2005
1
2
3
4
5
6
7
8
9
10
11
12
2006
1
2
3
4
5
6
7
8
9
10
11
12
2007
1
2
3
4
5
6
7
8
9
10
11
12
2008
1
2
3
4
5
6
7
8
9
10
11
12
2009
1
2
3
4
5
6
7
8
9
10
11
12
2010
1
2
3
4
5
6
7
8
9
10
11
12
2011
1
2
3
4
5
6
7
8
9
10
11
12
2012
1
2
3
4
5
6
7
8
9
10
11
12
2013
1
2
3
4
5
6
7
8
9
10
11
12
2014
1
2
3
4
5
6
7
8
9
10
11
12
2015
1
2
3
4
5
6
7
8
9
10
11
12
2016
1
2
3
4
5
6
7
8
9
10
11
12
