# This notebook processes CAFE f1 ocean daily data for building climatologies. Only the first 2 years of the forecasts from the period 2003-2015 are used.
Temporary files are written to `tmp_fldr`

In [1]:
# Import packages -----
import pandas as pd
import xarray as xr
import numpy as np
from pylatte import utils
from ipywidgets import FloatProgress

#### Initialise

In [2]:
# Location of forecast data -----
fcst_folder = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/'
fcst_filename = 'ocean_daily*'

fields = pd.DataFrame( \
        {'name_CAFE': ['sst', 'patm_t', 'eta_t', 'sss', 'u_surf', 'v_surf'],
         'name_std' : ['sst', 'patm',   'eta',   'sss', 'u_s',    'v_s']}
                     )

name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields

Unnamed: 0,name_CAFE,name_std
0,sst,sst
1,patm_t,patm
2,eta_t,eta
3,sss,sss
4,u_surf,u_s
5,v_surf,v_s


#### Save each init month and variable separately due to memory considerations (this is only necessary for multi-level variables, but all are dealt with in the same way for simplicity)

In [3]:
# Temporary folder location -----
tmp_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/tmp/'

In [4]:
# Loop over forecasts, saving numerator and denominator of mean -----
years = range(2003,2016)
months = range(1,13)
ensembles = range(1,12)

for idx, variable in enumerate(fields['name_CAFE']):
    print(variable)
    print('----------')
        
    for year in years:
        print(year)

        for month in months:
            print(month)
            
            # First see if file already exists -----
            savename = 'cafe.fcst.v1.ocean.' + fields['name_std'][idx] + '.' + str(year) + '.' + str(month) + '.clim.nc'
            try:
                xr.open_dataset(tmp_fldr + savename, autoclose=True)
            except:
                ens_list = []
                ens = []
                empty = True
                for ie, ensemble in enumerate(ensembles):

                    path = fcst_folder + '/yr' + str(year) + '/mn' + str(month) + \
                           '/OUTPUT.' + str(ensemble) + '/' + fcst_filename + '.nc'

                    # Try to stack ensembles into a list -----
                    try:
                        dataset = xr.open_mfdataset(path, autoclose=True)[variable]

                        # Truncate to 2 year forecasts -----
                        n_trunc = min([731, len(dataset.time)])
                        dataset = dataset.isel(time=range(n_trunc))

                        if 'xu_ocean' in dataset.dims:
                            dataset = dataset.rename({'xu_ocean':'lon_u','yu_ocean':'lat_u'})
                        if 'xt_ocean' in dataset.dims:
                            dataset = dataset.rename({'xt_ocean':'lon_t','yt_ocean':'lat_t'})
                        ens_list.append(dataset.rename(fields['name_std'][idx]))
                        ens.append(ie+1)
                        empty = False
                    except OSError:
                        # File does not exist -----
                        pass

                # Concatenate ensembles -----
                if empty == False:
                    ds = xr.concat(ens_list, dim='ensemble')
                    ds['ensemble'] = ens

                    # Make month_day array of month-day -----
                    m = np.array([str(i).zfill(2) + '-' for i in ds.time.dt.month.values])
                    d = np.array([str(i).zfill(2)  for i in ds.time.dt.day.values])
                    md = np.core.defchararray.add(m, d)

                    # Replace time array with month_day array and groupby -----
                    ds['time'] = md
                    ds_clim = ds.groupby('time').sum(dim='time',keep_attrs=True).to_dataset(name='sum').mean(dim='ensemble')
                    get_len = lambda ds, dim : ds.count(dim=dim)
                    ds_clim['count'] = ds['time'].groupby('time').apply(get_len, dim='time')

                    ds_clim.to_netcdf(path = tmp_fldr + savename, mode = 'w')

                    del ds, ds_clim

sst
----------
2003
1
2
3
4
5
6
7
8
9
10
11
12
2004
1
2
3
4
5
6
7
8
9
10
11
12
2005
1
2
3
4
5
6
7
8
9
10
11
12
2006
1
2
3
4
5
6
7
8
9
10
11
12
2007
1
2
3
4
5
6
7
8
9
10
11
12
2008
1
2
3
4
5
6
7
8
9
10
11
12
2009
1
2
3
4
5
6
7
8
9
10
11
12
2010
1
2
3
4
5
6
7
8
9
10
11
12
2011
1
2
3
4
5
6
7
8
9
10
11
12
2012
1
2
3
4
5
6
7
8
9
10
11
12
2013
1
2
3
4
5
6
7
8
9
10
11
12
2014
1
2
3
4
5
6
7
8
9
10
11
12
2015
1
2
3
4
5
6
7
8
9
10
11
12
patm_t
----------
2003
1
2
3
4
5
6
7
8
9
10
11
12
2004
1
2
3
4
5
6
7
8
9
10
11
12
2005
1
2
3
4
5
6
7
8
9
10
11
12
2006
1
2
3
4
5
6
7
8
9
10
11
12
2007
1
2
3
4
5
6
7
8
9
10
11
12
2008
1
2
3
4
5
6
7
8
9
10
11
12
2009
1
2
3
4
5
6
7
8
9
10
11
12
2010
1
2
3
4
5
6
7
8
9
10
11
12
2011
1
2
3
4
5
6
7
8
9
10
11
12
2012
1
2
3
4
5
6
7
8
9
10
11
12
2013
1
2
3
4
5
6
7
8
9
10
11
12
2014
1
2
3
4
5
6
7
8
9
10
11
12
2015
1
2
3
4
5
6
7
8
9
10
11
12
eta_t
----------
2003
1
2
3
4
5
6
7
8
9
10
11
12
2004
1
2
3
4
5
6
7
8
9
10
11
12
2005
1
2
3
4
5
6
7
8
9
10
11
12
2006
1
2
3
4
5
6
7
8
9

#### Combine and write into single climatology file

In [5]:
# Use year 2016 as time -----
path = fcst_folder + '/yr2016/mn1/OUTPUT.1/' + fcst_filename + '.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = dataset.time[:366]

In [6]:
# Loop over all variables -----
for idx, variable in enumerate(fields['name_std']):
    print(variable)
    print('----------')
    
    name = 'cafe.fcst.v1.ocean.' + fields['name_std'][idx] + '*' + '.clim.nc'
    ds = xr.open_mfdataset(tmp_fldr + name, autoclose=True, concat_dim='run')
    numer = ds['sum'].sum(dim='run')
    denom = ds['count'].sum(dim='run')
    if idx == 0:
        clim = (numer / denom).to_dataset(name=variable)
    else: 
        clim[variable] = (numer / denom).rename(variable)

clim['time'] = time_use

sst
----------
patm
----------
eta
----------
sss
----------
u_s
----------
v_s
----------


In [7]:
# Save the climatology -----
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/'
clim.to_netcdf(save_fldr + 'cafe.f1.ocean.2003010112_2017123112.clim.nc', mode = 'w',
               encoding = {'time':{'dtype':'float','calendar':'JULIAN',
                           'units':'days since 0001-01-01 00:00:00'}})