In [1]:
import numpy as np
import xarray as xr
import subprocess
import os

from ipywidgets import FloatProgress

##### Define CDO parameters

In [2]:
cdo_path = '/apps/cdo/1.6.3/bin/'

# Name and location of temporary files when using cdo/nco -----
tmp_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/squ027/tmp1/'
tmp_in = tmp_fldr + 'tmp_in.nc'
tmp_out = tmp_fldr + 'tmp_out.nc'

# Get the model grid
##### First rename variables to be cdo-compliant - tried to do this using nco, but it was slower

In [3]:
in_file = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2002/mn2/OUTPUT.1/ocean_daily_2002_02_01.nc'
out_file = tmp_in
xr.open_dataset(in_file, autoclose=True, decode_times = False)['sst'] \
                .rename('sst') \
                .rename({'yt_ocean':'lat','xt_ocean':'lon'}) \
                .to_netcdf(out_file)

##### Now generate cdo-compliant 'grid' file

In [4]:
in_file = tmp_in
out_file = tmp_fldr + 'cafe_v1_ocean_grid_def'
command = cdo_path + 'cdo griddes ' + in_file + ' > ' + out_file
subprocess.call(command, shell=True)

# Delete temporary file ----
os.remove(tmp_in)

# Interpolate observation file

In [None]:
# Location of observation data -----
obsv_folder = '/OSM/CBR/OA_DCFP/data/observations/sst/remss/v05.0/'
years = range(2002,2004)
obsv_filename = '*-REMSS-*.nc'
resample_freq = 'MS'

# Instantiate progress bar -----
f = FloatProgress(min=0, max=len(years), description='Loading...') 
display(f)

file_list =[]
for year in years:
    path = obsv_folder + str(year) + '/' + obsv_filename
    
    # Save file temporarily in cdo-compliant format -----
    tmp = xr.open_mfdataset(path, autoclose=True, decode_times = False)['analysed_sst'].rename('sst')
    time_units = tmp.time.attrs['units']  # Must keep track of units as cdo will 
    calendar = 'standard' # convert to its preferred units1
    tmp['time'] = xr.coding.times.decode_cf_datetime(tmp.time,units=time_units,calendar=calendar)
    tmp.to_netcdf(tmp_in, encoding = {'time':{'dtype':'float','calendar':calendar, \
                                              'units':time_units}})
    
    # Perform interpolation using cdo -----
    in_file = tmp_in
    out_file = tmp_out[:-3] + '_' + str(year) + tmp_out[-3:]
    command = cdo_path + 'cdo remapbil,' + tmp_fldr + 'cafe_v1_ocean_grid_def ' + in_file + ' ' + out_file
    subprocess.call(command, shell=True)
    
    # Load cdo-interpolated file and feed into list -----
    data = xr.open_dataset(out_file, autoclose=True, decode_times=False)
    data['time'] = xr.coding.times.decode_cf_datetime(data.time,units=time_units,calendar=calendar)
    
    # CDO seems to round the lat/lon values so that they are the same to within 1*10^-8, 
    # but not identical - use the actual interpolation grid -----
    in_file = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2002/mn2/OUTPUT.1/ocean_daily_2002_02_01.nc'
    for_grid = xr.open_dataset(in_file, autoclose=True, decode_times = False)['sst'] \
                    .rename({'yt_ocean':'lat','xt_ocean':'lon'})
    data['lat'] = for_grid['lat']
    data['lon'] = for_grid['lon']
    
    file_list.append(data)
    
    # Delete temporary files -----
    os.remove(tmp_in)
    
    f.value += 1 
    
data = xr.concat(file_list,'time')

In [None]:
time_use = data.time[]

# Make month_day array of month-day -----
m = np.array([str(i).zfill(2) + '-' for i in data.time.dt.month.values])
d = np.array([str(i).zfill(2)  for i in data.time.dt.day.values])
md = np.core.defchararray.add(m, d)

# Replace time array with month_day array and groupby -----
data['time'] = md
data_clim = data.groupby('time').mean(dim='time',keep_attrs=True)

# Fill time with presaved time -----
ds_clim['time'] = time_use
ds_clim.time.attrs['long_name'] = 'time'
ds_clim.time.attrs['cartesian_axis'] = 'T'
ds_clim.time.attrs['calendar_type'] = 'JULIAN'
ds_clim.time.attrs['bounds'] = 'time_bounds'

# Save the bundled/interpolated array

In [6]:
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/'
savename = 'REMSS.2002060112_2017123112.clim.nc'
data.to_netcdf(save_fldr + save_name, encoding = {'time':{'dtype':'float','calendar':calendar,
                                                          'units':time_units}},
                                      format='NETCDF4')