# This notebook saves CAFE v1 atmospheric daily climatologies in a single dataset

#### IMPORTANT: If adapting this code to save other climatologies, be careful to only include full years, as pyLatte will compute monthly climatologies from the saved daily climatologies

In [137]:
import pandas as pd
import xarray as xr
import numpy as np
from pylatte import utils
from ipywidgets import FloatProgress

#### Initialise

In [44]:
# Location of forecast data -----
fcst_folder = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/'
fcst_filename = 'atmos_daily*'
fields = pd.DataFrame( \
        {'name_CAFE': ['lwflx', 'shflx', 'tau_x', 'tau_y', 't_ref', 'q_ref', 'u_ref', 'v_ref', 't_ref_min',
                       't_ref_max', 't_surf', 'ps',  'slp', 'h500', 'hght', 'sphum', 'temp', 'ucomp', 'vcomp',
                       'precip', 'lwdn_sfc', 'lwup_sfc', 'olr',  'swdn_sfc', 'swup_sfc', 'swup_toa',   
                       'high_cld_amt', 'low_cld_amt', 'mid_cld_amt', 'tot_cld_amt', 'latb',  'lonb'],
         'name_std' : ['lwf',   'shf',   'tau_x', 'tau_y', 't_ref', 'q_ref', 'u_ref', 'v_ref', 't_ref_min',
                       't_ref_max', 't_s',    'p_s', 'slp', 'h500', 'gh',   'sphum', 'temp', 'u',     'v',
                       'precip', 'lwf_dn_s', 'lwf_up_s', 'olwr', 'swf_dn_s', 'swf_up_s', 'swf_up_toa', 
                       'high_cld_amt', 'low_cld_amt', 'mid_cld_amt', 'tot_cld_amt', 'lat_2', 'lon_2']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields

Unnamed: 0,name_CAFE,name_std
0,lwflx,lwf
1,shflx,shf
2,tau_x,tau_x
3,tau_y,tau_y
4,t_ref,t_ref
5,q_ref,q_ref
6,u_ref,u_ref
7,v_ref,v_ref
8,t_ref_min,t_ref_min
9,t_ref_max,t_ref_max


In [86]:
# Initial dates to include (takes approximately 1 min 30 sec per date) -----
init_dates = pd.date_range('2002-2','2016-5' , freq='1MS')

# Ensembles to include -----
ensembles = range(1,12)

#### Stack data into a single dataset

In [106]:
# Instantiate progress bar -----
f = FloatProgress(min=0, max=len(init_dates)*len(ensembles), description='Loading...') 
display(f)

# Loop over initial dates -----
fcst_list = []
for init_date in init_dates:
    year = init_date.year
    month = init_date.month
    
    # Loop over ensembles -----
    ens_list = []
    for ensemble in ensembles:
        # Signal to increment the progress bar -----
        f.value += 1 
        
        # Stack ensembles into a list -----
        path = fcst_folder + '/yr' + str(year) + '/mn' + str(month) + \
               '/OUTPUT.' + str(ensemble) + '/' + fcst_filename + '.nc'
        dataset = xr.open_mfdataset(path, autoclose=True)
        ens_list.append(dataset.drop(['average_T1','average_T2','average_DT','time_bounds']) \
                               .rename(name_dict))
        
    # Concatenate ensembles -----
    ens_object = xr.concat(ens_list, dim='ensemble')
    ens_object['ensemble'] = ensembles
    
    # Stack concatenated ensembles into a list for each initial date -----                       
    fcst_list.append(ens_object)

# Concatenate initial dates -----
ds = xr.concat(fcst_list, dim='time')

# Rechunk for chunksizes of at least 1,000,000 elements -----
ds = utils.prune(ds.chunk(chunks={'ensemble' : len(ds.ensemble), 
                                  'time' : len(ds.time)}).squeeze())

In [162]:
# Load one 366 day long year to provide time array -----
path = fcst_folder + '/yr2016/mn1/OUTPUT.1/' + fcst_filename + '.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = dataset.time[:366]

In [163]:
# Make variable that just has month and day -----
ds_md = ds.copy(deep=True)
months = np.array([str(i)+'-' for i in pd.DatetimeIndex(ds.time.values).month.values])
days = np.array([str(i) for i in pd.DatetimeIndex(ds.time.values).day.values])
month_day = np.core.defchararray.add(months, days)

ds_md['time'] = month_day

In [None]:
with utils.timer():
    # Make variable that just has month and day -----
    ds_clim = ds_md.mean(dim='ensemble').groupby('time').mean(dim='time')
    ds_clim['time'] = time_use

In [167]:
with utils.timer():
    savename = 'cafe.fcst.v1.atmos.2002020112_2018043012.clim.nc'
    ds_clim.to_netcdf(path='/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/' + savename, 
                      mode='w',
                      format='NETCDF4')           

KeyboardInterrupt: 