In [None]:
# calculate daily climatology (for use in anomaly calculation)

In [None]:
import os
from tqdm import tqdm
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from netCDF4 import Dataset, MFDataset
import numba as nb
import time as timing
from numba import njit 
from math import sin, cos, sqrt, atan2, radians
from geopy.distance import distance
import seawater as sw
from scipy import interpolate

In [None]:
#---
# get avg field over all years and save as netcdf
#---

vari = 'totChl_emulator'
year_list = np.arange(1997,2018+1)
save_netcdf =True

#------ NO CHANGES NEEDED BELOW THIS LINE

if vari=='diat_specific_growth_rate_surf':
    vari2 = 'mu_diat'
elif vari=='sp_specific_growth_rate_surf':
    vari2 = 'mu_sp'
elif vari=='totChl_hr':
    vari2 = 'totChl'
elif vari=='totChl_emulator':
    vari2 = 'totChl_masked'
else:
    vari2 = vari
print(vari,vari2,'...')

if vari in ['totChl']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_chl/'
    unit    = 'mg chl m-3' 
elif vari in ['totChl_hr']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_HIGH_RES_chl_regridded/'
    unit    = 'mg chl m-3' 
elif vari in ['totChl_emulator']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_totChl_emulator_regridded/'
    unit    = 'mg chl m-3' 
elif vari in ['FG_CO2_2']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_fco2/'
    unit    = 'mmol m-3 cm s-1' 
elif vari in ['ECOSYS_IFRAC_2']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_sic/'
    unit    = 'n.d.' 
elif vari in ['MLD']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_MLD_regridded/'
    unit    = 'cm' 
elif vari in ['SST']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_SST_regridded/'
    unit    = 'deg C' 
elif vari in ['wind_speed']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/JRA_wind_speed/'
    unit    = 'm s-1' 
elif vari in ['slp']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/JRA_slp/'
    unit    = 'Pa' 
elif vari in ['photoC_total_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_total_srf_photoC_regridded/'
    unit    = 'mmol m-3 cm s-1' 
elif vari in ['photoC_zint']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_total_int_photoC_regridded/'
    unit    = 'mmol m-3 cm s-1' 
elif vari in ['phytoC_zint_100m']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_total_int_phyto_C_biomass_regridded/'
    unit    = 'mmol m-2'    
elif vari in ['cloudfrac_isccp']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_cloudfrac_isccp_regridded/'
    unit    = 'n.d.' 
elif vari in ['PAR_incoming']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/JRA_PAR_incoming/'
    unit    = 'W m-2' 
elif vari in ['diat_specific_growth_rate_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_diat_specific_growth_rate_regridded/'
    unit    = 'd-1' 
elif vari in ['sp_specific_growth_rate_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_sp_specific_growth_rate_regridded/'
    unit    = 'd-1' 
elif vari in ['diat_light_lim_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_diat_light_lim_surf_regridded/'
    unit    = 'n.d.' 
elif vari in ['sp_light_lim_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_sp_light_lim_surf_regridded/'
    unit    = 'n.d.' 
elif vari in ['diat_Fe_lim_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_diat_Fe_lim_surf_regridded/'
    unit    = 'n.d.' 
elif vari in ['sp_Fe_lim_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_sp_Fe_lim_surf_regridded/'
    unit    = 'n.d.' 
elif vari in ['diat_SiO3_lim_surf']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_diat_SiO3_lim_surf_regridded/'
    unit    = 'n.d.' 
elif vari in ['diatChl_SURF']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_diatChl_SURF_regridded/'
    unit    = 'mg chl m-3' 
elif vari in ['spChl_SURF']:
    path1   = '/global/cfs/cdirs/m4003/cnissen/CESM_spChl_SURF_regridded/'
    unit    = 'mg chl m-3' 

# calculate climatology
if vari not in ['totChl_emulator']:
    data_avg = np.zeros([365, 320, 640]) # days x lat x lon
else: # emulator field needs to be treated differently: as location of NaNs is different for each year, the fields cannot simply be added up
    data_avg = np.zeros([len(year_list),365, 320, 640]) # days x lat x lon
for yy in tqdm(range(0,len(year_list))):
    file1 = vari+'_JRA_grid_'+str(year_list[yy])+'-01-01.nc'
    ff = xr.open_dataset(path1+file1)
    data1 = np.squeeze(ff[vari2]).values
    if yy==0:
        lat = np.squeeze(ff['latitude']).values
        lon = np.squeeze(ff['longitude']).values
        print('Min/Max lon:',np.nanmin(lon),np.nanmax(lon))
    ff.close()
    if vari not in ['totChl_emulator']:
        data_avg = data_avg+data1
    else: 
        data_avg[yy,:,:,:] = data1
    del data1
    
# normalize by number of years
print(np.nanmin(data_avg),np.nanmax(data_avg))
if vari not in ['totChl_emulator']:
    data_avg = np.divide(data_avg,len(year_list))
else: 
    data_avg = np.nanmean(data_avg,axis=0) # emulator field needs to be treated differently: as location of NaNs is different for each year, the fields cannot simply be added up
print(np.nanmin(data_avg),np.nanmax(data_avg))

         
    

In [None]:
#---
# load climatology of full field
#---
# used to mask land
# used to (later) compare to climatology from emulator

path1 = '/global/cfs/cdirs/m4003/cnissen/CESM_chl/'

ff = xr.open_dataset(path1+'Climatology_totChl_JRA_grid_1997_2018.nc')
data_clim = ff['totChl'].values
ff.close()
print(data_clim.shape)


In [None]:
#----
# interpolate missing data points
#----
# NOTE: in SH (NH) winter, all values south (north) of a certain latitude are missing
# ---> interpolate only at those latitudes where XX% of data are available  
# interpolation code from here: https://stackoverflow.com/questions/21690608/numpy-inpaint-nans-interpolate-and-extrapolate 

print(data_avg.shape)

data_avg_filled = np.nan*np.ones_like(data_avg)
for dd in tqdm(range(0,365)):

    unfilled = np.copy(data_avg[dd,:,:])
    valid_mask = ~np.isnan(unfilled)
    coords = np.array(np.nonzero(valid_mask)).T
    values = unfilled[valid_mask]
    it = interpolate.LinearNDInterpolator(coords, values, fill_value=0)
    filled = it(list(np.ndindex(unfilled.shape))).reshape(unfilled.shape)

    # mask land
    filled[np.isnan(data_clim[dd,:,:])] = np.nan

    # mask areas where all longitudes are missing
    for mm in range(0,unfilled.shape[0]): # loop over all latitudes
        if np.sum(np.isnan(unfilled[mm,:]))==unfilled.shape[1]: # if all longitudes are mising
            filled[mm,:] = np.nan   

    data_avg_filled[dd,:,:] = filled
    
    if (dd==0) | (dd==180): # only plot twice
        f, (ax0, ax1) = plt.subplots(1, 2)
        ax0.imshow(unfilled, cmap='gray', interpolation='nearest')
        ax0.set_title('Unfilled input data')
        ax1.imshow(filled, cmap='gray', interpolation='nearest')
        ax1.set_title('Interpolated data')
        plt.show()
        
    del filled,unfilled,valid_mask,coords,values,it


In [None]:
#-----
# store in new file
#-----

if save_netcdf:
    savepath = path1
    fv = -999
    
    netcdf_name = 'Climatology_'+vari+'_JRA_grid_'+str(year_list[0])+'_'+str(year_list[-1])+'_interpolated.nc'
    if not os.path.exists(savepath+netcdf_name):
        print('Create file '+savepath+netcdf_name)
        w_nc_fid = Dataset(savepath+netcdf_name, 'w', format='NETCDF4_CLASSIC')
        w_nc_fid.contact = 'Cara Nissen, cara.nissen@colorado.edu'
        w_nc_fid.source_file = path1+file1
        w_nc_fid.script    = '/global/homes/c/cnissen/scripts/save_CESM_daily_climatology_emulator_gap_filled.ipynb'
        # create dimension & variable
        w_nc_fid.createDimension('lon', len(lon)) 
        w_nc_fid.createDimension('lat', len(lat)) 
        w_nc_fid.createDimension('Time', data_avg.shape[0]) 
        w_nc_var1 = w_nc_fid.createVariable(vari2, 'f4',('Time','lat','lon'),fill_value=fv)
        w_nc_var1.description = vari2+' climatology (avg from '+str(year_list[0])+'_'+str(year_list[-1])+')'
        w_nc_var1.unit = unit
        w_nc_var1.note = 'used scipy.interpolate.LinearNDInterpolator to fill gaps due to clouds'
        w_nc_var1 = w_nc_fid.createVariable('lat', 'f4',('lat'),fill_value=fv)
        w_nc_var1.description = 'Latitude'
        w_nc_var1.unit = 'deg N'
        w_nc_var1 = w_nc_fid.createVariable('lon', 'f4',('lon'),fill_value=fv)
        w_nc_var1.description = 'Longitude (0:360)'
        w_nc_var1.unit = 'deg E'
        w_nc_fid.close()

        data_avg[np.isnan(data_avg)] = fv

        w_nc_fid = Dataset(savepath+netcdf_name, 'r+', format='NETCDF4_CLASSIC') 
        w_nc_fid.variables[vari2][:]  = data_avg_filled
        w_nc_fid.variables['lat'][:] = lat
        w_nc_fid.variables['lon'][:] = lon
        w_nc_fid.close() 
        
print('done')       

