# This notebook processes CAFE c2_restart ocean daily data for building climatologies. Only the last 100 years are used.

In [1]:
# Import packages -----
import pandas as pd
import xarray as xr
import numpy as np
from ipywidgets import FloatProgress
from dateutil.relativedelta import relativedelta

#### Initialise

In [2]:
# Standard naming -----
fields = pd.DataFrame( \
        {'name_CAFE': ['sst', 'patm_t', 'eta_t', 'sss', 'u_surf', 'v_surf', 'mld'],
         'name_std' : ['sst', 'patm',   'eta',   'sss', 'u_s',    'v_s',    'mld']}
                     )
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

fields

Unnamed: 0,name_CAFE,name_std
0,sst,sst
1,patm_t,patm
2,eta_t,eta
3,sss,sss
4,u_surf,u_s
5,v_surf,v_s
6,mld,mld


#### Only use last 50 years

In [3]:
# Loop over all paths -----
base = '/OSM/CBR/OA_DCFP/data2/model_output/CAFE/controls/c2_restart/'
years = range(500,550)

paths = []
for year in years:
    path = base + 'ocean_daily_0' + str(year) + '_01_01.nc'
    paths.append(path)

ds = xr.open_mfdataset(paths, autoclose=True) \
       .drop(['average_T1','average_T2','average_DT','time_bounds',
              'area_t','area_u','geolat_c','geolat_t','ht','mld_sq']) \
       .rename(name_dict)
        
if 'xu_ocean' in ds.dims:
    ds = ds.rename({'xu_ocean':'lon_2','yu_ocean':'lat_2'})
if 'xt_ocean' in ds.dims:
    ds = ds.rename({'xt_ocean':'lon','yt_ocean':'lat'})

  enable_cftimeindex)
  return self.func(self.array[key])


In [4]:
ds

<xarray.Dataset>
Dimensions:  (lat: 300, lat_2: 300, lon: 360, lon_2: 360, nv: 2, time: 18250)
Coordinates:
  * lon      (lon) float64 -279.5 -278.5 -277.5 -276.5 -275.5 -274.5 -273.5 ...
  * lat      (lat) float64 -77.88 -77.63 -77.38 -77.13 -76.88 -76.63 -76.37 ...
  * nv       (nv) float64 1.0 2.0
  * lon_2    (lon_2) float64 -279.0 -278.0 -277.0 -276.0 -275.0 -274.0 ...
  * lat_2    (lat_2) float64 -77.75 -77.51 -77.26 -77.01 -76.75 -76.5 -76.24 ...
  * time     (time) object  500-01-01 12:00:00  500-01-02 12:00:00 ...
Data variables:
    patm     (time, lat, lon) float32 dask.array<shape=(18250, 300, 360), chunksize=(365, 300, 360)>
    eta      (time, lat, lon) float32 dask.array<shape=(18250, 300, 360), chunksize=(365, 300, 360)>
    sss      (time, lat, lon) float32 dask.array<shape=(18250, 300, 360), chunksize=(365, 300, 360)>
    sst      (time, lat, lon) float32 dask.array<shape=(18250, 300, 360), chunksize=(365, 300, 360)>
    u_s      (time, lat_2, lon_2) float32 dask.arra

In [5]:
# Use year 2016 as time -----
path = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2016/mn1/OUTPUT.1/ocean_daily*.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = xr.concat([dataset.time[:59], dataset.time[60:366]],dim='time')
time_ly = dataset.time[59]

In [6]:
# Make month_day array of month-day -----
m = [str(ds.time.values[i].timetuple()[1]).zfill(2) + '-' for i in range(len(ds.time))]
d = [str(ds.time.values[i].timetuple()[2]).zfill(2) for i in range(len(ds.time))]
md = np.core.defchararray.add(m, d)

# Replace time array with month_day array and groupby -----
ds['time'] = md
clim = ds.groupby('time').mean(dim='time',keep_attrs=True)
clim['time'] = time_use

In [7]:
# Replicate Feb 28th as Feb 29th to deal with leap years -----
clim_ly = clim.copy().sel(time='2016-02-28')
clim_ly['time'] = np.array([time_ly.values])
clim = xr.auto_combine([clim,clim_ly]).sortby('time')

In [9]:
# Save the climatology -----
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/doppyo/mean_climatologies/'
clim.to_netcdf(save_fldr + 'cafe.c2.ocean.500_549.clim.nc', mode = 'w',
               encoding = {'time':{'dtype':'float','calendar':'JULIAN',
                           'units':'days since 0001-01-01 00:00:00'}})

  x = np.divide(x1, x2, out)
