# This notebook processes CAFE c2 atmospheric daily data for building climatologies. Only the last 100 years are used.

In [3]:
# Import packages -----
import pandas as pd
import xarray as xr
import numpy as np
from ipywidgets import FloatProgress
from dateutil.relativedelta import relativedelta

#### Initialise

In [4]:
# Standard naming -----
fields = pd.DataFrame( \
        {'name_CAFE': ['ucomp', 'vcomp', 'temp', 'sphum', 'hght', 'lwflx', 'shflx', 'tau_x', 'tau_y', 't_ref', 
                       'q_ref', 'u_ref', 'v_ref', 't_surf', 'h500', 'precip', 'lwdn_sfc', 'olr', 
                       'swdn_sfc', 'swup_toa'],
         'name_std' : ['u',     'v',     'temp', 'sphum', 'gh',   'lwf',   'shf',   'tau_x', 'tau_y', 't_ref', 
                       'q_ref', 'u_ref', 'v_ref', 't_s',    'h500', 'precip', 'lwf_dn_s', 'olr', 
                       'swf_dn_s', 'swf_up_toa']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields

Unnamed: 0,name_CAFE,name_std
0,ucomp,u
1,vcomp,v
2,temp,temp
3,sphum,sphum
4,hght,gh
5,lwflx,lwf
6,shflx,shf
7,tau_x,tau_x
8,tau_y,tau_y
9,t_ref,t_ref


#### Only use last 100 years

In [5]:
# Loop over all paths -----
base = '/OSM/CBR/OA_DCFP/data2/model_output/CAFE/controls/c2/OUTPUT/'
years = range(400,500)

paths = []
for year in years:
    path = base + 'atmos_daily_0' + str(year) + '_01_01.nc'
    paths.append(path)

ds = xr.open_mfdataset(paths, autoclose=True) \
       .drop(['average_T1','average_T2','average_DT','time_bounds']) \
       .rename(name_dict)
        
if 'latb' in ds.dims:
    ds = ds.rename({'latb':'lat_2','lonb':'lon_2'})

  enable_cftimeindex)
  return self.func(self.array[key])


In [6]:
# Use year 2016 as time -----
path = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2016/mn1/OUTPUT.1/atmos_daily*.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = xr.concat([dataset.time[:59], dataset.time[60:366]],dim='time')
time_ly = dataset.time[59]

In [7]:
# Make month_day array of month-day -----
m = [str(ds.time.values[i].timetuple()[1]).zfill(2) + '-' for i in range(len(ds.time))]
d = [str(ds.time.values[i].timetuple()[2]).zfill(2) for i in range(len(ds.time))]
md = np.core.defchararray.add(m, d)

# Replace time array with month_day array and groupby -----
ds['time'] = md
clim = ds.groupby('time').mean(dim='time',keep_attrs=True)
clim['time'] = time_use

In [8]:
# Replicate Feb 28th as Feb 29th to deal with leap years -----
clim_ly = clim.copy().sel(time='2016-02-28')
clim_ly['time'] = np.array([time_ly.values])
clim = xr.auto_combine([clim,clim_ly]).sortby('time')

In [9]:
# Save the climatology -----
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/'
clim.to_netcdf(save_fldr + 'cafe.c2.atmos.400_499.clim.nc', mode = 'w',
               encoding = {'time':{'dtype':'float','calendar':'JULIAN',
                           'units':'days since 0001-01-01 00:00:00'}})