In [3]:
# Packages and modules -----
import numpy as np
import xarray as xr
import subprocess
import os

from ipywidgets import FloatProgress

In [2]:
# Define CDO parameters -----
cdo_path = '/apps/cdo/1.6.3/bin/'

# Name and location of temporary files when using cdo/nco -----
tmp_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/squ027/tmp/'
tmp_in = tmp_fldr + 'tmp_in.nc'
tmp_out = tmp_fldr + 'tmp_out.nc'

# Get the model grid

In [3]:
# First rename variables to be cdo-compliant -----
in_file = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2002/mn2/OUTPUT.1/ocean_daily_2002_02_01.nc'
out_file = tmp_in
xr.open_dataset(in_file, autoclose=True, decode_times = False)['sst'] \
                .rename('sst') \
                .rename({'yt_ocean':'lat','xt_ocean':'lon'}) \
                .to_netcdf(out_file)

In [4]:
# Now generate cdo-compliant 'grid' file -----
in_file = tmp_in
out_file = tmp_fldr + 'cafe_v1_ocean_grid_def'
command = cdo_path + 'cdo griddes ' + in_file + ' > ' + out_file
subprocess.call(command, shell=True)

# Delete temporary file ----
os.remove(tmp_in)

# Interpolate observation file

In [None]:
# Interpolate all REMSS data to cafe grid (~ 2.5 hours) -----
obsv_folder = '/OSM/CBR/OA_DCFP/data/observations/sst/remss/v05.0/'
years = range(2002,2019)

# Instantiate progress bar -----
f = FloatProgress(min=0, max=len(years)*365, description='Loading...') 
display(f)

for year in years:
    path = obsv_folder + str(year) + '/'
    for file in sorted(os.listdir(tmp_fldr)):
        if file.endswith('.nc'):
            in_file = path + '/' + file
            out_file = tmp_fldr + file[:-3] + '.cafe_grid' + file[-3:]
            command = cdo_path + 'cdo remapbil,' + tmp_fldr + 'cafe_v1_ocean_grid_def ' + in_file + ' ' + out_file
            subprocess.call(command, shell=True)
            
            f.value += 1 

# Load all interpolated files

In [34]:
# Concatenate all data in time -----
data_list = []
for file in sorted(os.listdir(tmp_fldr)):
    if file.endswith('.nc'):
        with xr.open_dataset(tmp_fldr + file, decode_times = False) as data:
            data_list.append(data)
            
data = xr.concat(data_list,dim='time')

In [11]:
# Decode time manually -----
expl_file = '20020601120000-REMSS-L4_GHRSST-SSTfnd-MW_IR_OI-GLOB-v02.0-fv05.0.nc'
for_time = xr.open_dataset(obsv_folder + '2002/' + expl_file, 
                           autoclose=True, decode_times = False)
time_units = for_time.time.attrs['units']
calendar = 'standard' 
data['time'] = xr.coding.times.decode_cf_datetime(data.time,units=time_units,calendar=calendar)

NameError: name 'data' is not defined

In [37]:
# Replace grid with the actual interpolation grid -----
in_file = '/OSM/CBR/OA_DCFP/data/model_output/CAFE/forecasts/v1/yr2002/mn2/OUTPUT.1/ocean_daily_2002_02_01.nc'
for_grid = xr.open_dataset(in_file, autoclose=True, decode_times = False)['sst'] \
                .rename({'yt_ocean':'lat','xt_ocean':'lon'})
data['lat'] = for_grid['lat']
data['lon'] = for_grid['lon']

# Save the new array

In [41]:
# Ensure time encoding done correctly -----
save_fldr = '/OSM/CBR/OA_DCFP/data/intermediate_products/squ027/'
save_name = 'REMSS.sst.daily.cafe_grid.20020601_20180418.nc'
data.to_netcdf(save_fldr + save_name, encoding = {'time':{'dtype':'float','calendar':calendar,
                                                          'units':time_units}},
                                      format='NETCDF4')

# Compute the daily climatology

In [54]:
# Group by month-day -----
time_use = data.time[ts:ts+366] # Use 2016

# Make month_day array of month-day -----
m = np.array([str(i).zfill(2) + '-' for i in data.time.dt.month.values])
d = np.array([str(i).zfill(2)  for i in data.time.dt.day.values])
md = np.core.defchararray.add(m, d)

# Replace time array with month_day array and groupby -----
data['time'] = md

[('01-01', <xarray.Dataset>
Dimensions:           (lat: 300, lon: 360, time: 16)
Coordinates:
  * lon               (lon) float64 -279.5 -278.5 -277.5 -276.5 -275.5 ...
  * lat               (lat) float64 -77.88 -77.63 -77.38 -77.13 -76.88 ...
  * time              (time) <U5 '01-01' '01-01' '01-01' '01-01' '01-01' ...
Data variables:
    analysed_sst      (time, lat, lon) float32 nan nan nan nan nan nan nan ...
    analysis_error    (time, lat, lon) float32 nan nan nan nan nan nan nan ...
    sea_ice_fraction  (time, lat, lon) float32 nan nan nan nan nan nan nan ...
    mask              (time, lat, lon) float32 2.0 2.0 2.0 2.0 2.0 2.0 2.0 ...
Attributes:
    CDI:                        Climate Data Interface version 1.6.3 (http://...
    history:                    Tue May 01 08:25:27 2018: cdo remapbil,/OSM/C...
    source:                     OBPG-L3C-MODIS_Terra,REMSS-L3C-AMSRE,REMSS-L3...
    institution:                REMSS
    Conventions:                CF-1.6,ACDD-1.3
    ti

In [57]:
# Compute climatology -----
ds_clim = data.groupby('time').mean(dim=['time'],keep_attrs=True)

# Fill time with presaved time -----
ds_clim['time'] = time_use
ds_clim.time.attrs['long_name'] = 'time'
ds_clim.time.attrs['cartesian_axis'] = 'T'
ds_clim.time.attrs['calendar_type'] = calendar
ds_clim.time.attrs['bounds'] = 'time_bounds'

In [6]:
# Drop unwanted variables ------
ds_clim = ds_clim.drop('analysis_error').drop('mask').rename({'analysed_sst':'sst'})

In [13]:
# Save climatology -----
savename = 'REMSS.2002060112_2018041812.clim.nc'
ds_clim.to_netcdf(path='/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/' + savename,
                      mode = 'w',
                      encoding = {'time':{'dtype':'float','calendar':calendar,
                                          'units':time_units}}) 