# This notebook saves jra55 daily climatologies in a single dataset

#### IMPORTANT: If adapting this code to save other climatologies, be careful to only include full years, as pyLatte will compute monthly climatologies from the saved daily climatologies

In [12]:
import pandas as pd
import xarray as xr
from pylatte import utils

#### jra55 details

In [16]:
jra_folder = '/OSM/CBR/OA_DCFP/data/observations/jra55/isobaric/'
fields = pd.DataFrame( \
        {'fldname' : ['001_pres',       '002_prmsl',      '007_hgt',              '011_tmp',       '033_ugrd',
                      '034_vgrd',       '061_tprat'],
         'flename' : ['jra.55.pres.',   'jra.55.prmsl.',  'jra.55.hgt.',          'jra.55.tmp.',   'jra.55.ugrd.',
                      'jra.55.vgrd.',   'jra.55.tprat.'],
         'name_jra': ['PRES_GDS0_SFC',  'PRMSL_GDS0_MSL', 'HGT_GDS0_ISBL',        'TMP_GDS0_ISBL', 'UGRD_GDS0_ISBL',
                      'VGRD_GDS0_ISBL', 'TPRAT_GDS0_SFC_ave3h'],
         'lat_jra' : ['g0_lat_1',       'g0_lat_1',       'g0_lat_2',             'g0_lat_2',      'g0_lat_2',
                      'g0_lat_2',       'g0_lat_2'],
         'lon_jra' : ['g0_lon_2',       'g0_lon_2',       'g0_lon_3',             'g0_lon_3',      'g0_lon_3',
                      'g0_lon_3',       'g0_lon_3'],
         'fcn_p'   : [False,             False,           True,                   True,            True,
                      True,              False],
         'fcst'    : [False,             False,           False,                  False,           False,
                      False,             True],
         'name_std': ['ps',              'slp',           'gh',                   'temp',          'u',
                      'v',               'precip']}
                     )
fields

Unnamed: 0,fcn_p,fcst,fldname,flename,lat_jra,lon_jra,name_jra,name_std
0,False,False,001_pres,jra.55.pres.,g0_lat_1,g0_lon_2,PRES_GDS0_SFC,ps
1,False,False,002_prmsl,jra.55.prmsl.,g0_lat_1,g0_lon_2,PRMSL_GDS0_MSL,slp
2,True,False,007_hgt,jra.55.hgt.,g0_lat_2,g0_lon_3,HGT_GDS0_ISBL,gh
3,True,False,011_tmp,jra.55.tmp.,g0_lat_2,g0_lon_3,TMP_GDS0_ISBL,temp
4,True,False,033_ugrd,jra.55.ugrd.,g0_lat_2,g0_lon_3,UGRD_GDS0_ISBL,u
5,True,False,034_vgrd,jra.55.vgrd.,g0_lat_2,g0_lon_3,VGRD_GDS0_ISBL,v
6,False,True,061_tprat,jra.55.tprat.,g0_lat_2,g0_lon_3,TPRAT_GDS0_SFC_ave3h,precip


#### Stack climatologies in a single dataset

In [26]:
for idx, row in fields.iterrows():
    name_dict = {row['name_jra'] : row['name_std'], 
                 row['lat_jra'] : 'lat', 
                 row['lon_jra'] : 'lon',
                 'initial_time0_hours' : 'time'}
    if row['fcn_p'] == True:
        name_dict.update({'lv_ISBL1' : 'pfull'})
    
    # Load data -----
    da = xr.open_mfdataset(jra_folder + row['fldname'] + '/cat/' + row['flename'] + '*' + '.clim.nc', 
                           concat_dim='lv_ISBL1',
                           autoclose=True).rename(name_dict)[row['name_std']]
        
    # Deal with forecast variables -----
    if row['fcst']:
        da = da.mean(dim='forecast_time1', keep_attrs=True)
        
    # Stack in DataSet -----
    if idx == 0:
        ds = da.to_dataset(name=row['name_std'])
    else:
        ds[row['name_std']] = da
    
# Sort pressure levels -----
ds = ds.sortby(ds.pfull)


In [27]:
ds

<xarray.Dataset>
Dimensions:  (lat: 145, lon: 288, pfull: 37, time: 366)
Coordinates:
  * lon      (lon) float32 0.0 1.25 2.5 3.75 5.0 6.25 7.5 8.75 10.0 11.25 ...
  * lat      (lat) float32 90.0 88.75 87.5 86.25 85.0 83.75 82.5 81.25 80.0 ...
  * time     (time) datetime64[ns] 2016-01-01T09:00:00 2016-01-02T09:00:00 ...
  * pfull    (pfull) float64 1.0 2.0 3.0 5.0 7.0 10.0 20.0 30.0 50.0 70.0 ...
Data variables:
    ps       (time, lat, lon) float64 dask.array<shape=(366, 145, 288), chunksize=(366, 145, 288)>
    slp      (time, lat, lon) float64 dask.array<shape=(366, 145, 288), chunksize=(366, 145, 288)>
    gh       (time, pfull, lat, lon) float64 dask.array<shape=(366, 37, 145, 288), chunksize=(366, 37, 145, 288)>
    temp     (time, pfull, lat, lon) float64 dask.array<shape=(366, 37, 145, 288), chunksize=(366, 37, 145, 288)>
    u        (time, pfull, lat, lon) float64 dask.array<shape=(366, 37, 145, 288), chunksize=(366, 37, 145, 288)>
    v        (time, pfull, lat, lon) float6

In [28]:
savename = 'jra.isobaric.1958010100_2016123118.clim.nc'
ds.to_netcdf(path='/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/' + savename, 
             mode='w',
             format='NETCDF4')           