# This notebook processes CAFE c2_restart atmospheric daily data for building climatologies. Only the last 50 years are used.

In [1]:
# Import packages -----
import pandas as pd
import xarray as xr
import numpy as np
from ipywidgets import FloatProgress
from dateutil.relativedelta import relativedelta

#### Initialise

In [2]:
# Standard naming -----
fields = pd.DataFrame( \
        {'name_CAFE': ['ucomp', 'vcomp', 'temp', 'sphum', 'hght', 'lwflx', 'shflx', 'tau_x', 'tau_y', 't_ref', 
                       'q_ref', 'u_ref', 'v_ref', 't_surf', 't_ref_min', 't_ref_max', 'ps', 'h500', 'precip', 'lwdn_sfc', 
                       'olr', 'swdn_sfc', 'swup_toa'],
         'name_std' : ['u',     'v',     'temp', 'sphum', 'gh',   'lwf',   'shf',   'tau_x', 'tau_y', 't_ref', 
                       'q_ref', 'u_ref', 'v_ref', 't_s',    't_ref_min', 't_ref_max', 'ps', 'h500', 'precip', 'lwf_dn_s', 
                       'olr', 'swf_dn_s', 'swf_up_toa']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields

Unnamed: 0,name_CAFE,name_std
0,ucomp,u
1,vcomp,v
2,temp,temp
3,sphum,sphum
4,hght,gh
5,lwflx,lwf
6,shflx,shf
7,tau_x,tau_x
8,tau_y,tau_y
9,t_ref,t_ref


#### Only use last 50 years

In [3]:
# Loop over all paths -----
base = '/OSM/CBR/OA_DCFP/data2/model_output/CAFE/controls/c2_restart/OUTPUT/'
years = range(500,550)

paths = []
for year in years:
    path = base + 'atmos_daily_0' + str(year) + '_01_01.plevel.nc'
    paths.append(path)

ds = xr.open_mfdataset(paths, autoclose=True) \
       .drop(['average_T1','average_T2','average_DT','time_bounds']) \
       .rename(name_dict)

  enable_cftimeindex)
  return self.func(self.array[key])


In [4]:
# Use year 2016 as time -----
path = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2016/mn1/OUTPUT.1/atmos_daily*.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = xr.concat([dataset.time[:59], dataset.time[60:366]],dim='time')
time_ly = dataset.time[59]

In [5]:
# Make month_day array of month-day -----
m = [str(ds.time.values[i].timetuple()[1]).zfill(2) + '-' for i in range(len(ds.time))]
d = [str(ds.time.values[i].timetuple()[2]).zfill(2) for i in range(len(ds.time))]
md = np.core.defchararray.add(m, d)

# Replace time array with month_day array and groupby -----
ds['time'] = md
clim = ds.groupby('time').mean(dim='time',keep_attrs=True)
clim['time'] = time_use

In [6]:
# Replicate Feb 28th as Feb 29th to deal with leap years -----
clim_ly = clim.copy().sel(time='2016-02-28')
clim_ly['time'] = np.array([time_ly.values])
clim = xr.auto_combine([clim,clim_ly]).sortby('time')

In [7]:
%%time
# Save the climatology -----
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/doppyo/mean_climatologies/'
clim.to_netcdf(save_fldr + 'cafe.c2.atmos.500_549.clim.new.nc', mode = 'w',
               encoding = {'time':{'dtype':'float','calendar':'JULIAN',
                           'units':'days since 0001-01-01 00:00:00'}})

CPU times: user 44min 28s, sys: 12min 55s, total: 57min 23s
Wall time: 40min 13s


In [8]:
clim

<xarray.Dataset>
Dimensions:     (lat: 90, latb: 91, level: 37, lon: 144, lonb: 145, nv: 2, time: 366)
Coordinates:
  * lon         (lon) float64 1.25 3.75 6.25 8.75 11.25 13.75 16.25 18.75 ...
  * lonb        (lonb) float64 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 ...
  * lat         (lat) float64 -89.49 -87.98 -85.96 -83.93 -81.91 -79.89 ...
  * latb        (latb) float64 -90.0 -88.99 -86.97 -84.94 -82.92 -80.9 ...
  * level       (level) float32 1.0 2.0 3.0 5.0 7.0 10.0 20.0 30.0 50.0 70.0 ...
  * nv          (nv) float64 1.0 2.0
  * time        (time) datetime64[ns] 2016-01-01T12:00:00 ...
Data variables:
    gh          (time, level, lat, lon) float32 dask.array<shape=(366, 37, 90, 144), chunksize=(366, 37, 90, 144)>
    lwf         (time, lat, lon) float32 dask.array<shape=(366, 90, 144), chunksize=(366, 90, 144)>
    shf         (time, lat, lon) float32 dask.array<shape=(366, 90, 144), chunksize=(366, 90, 144)>
    tau_x       (time, lat, lon) float32 dask.array<shape=(366,