In [1]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
####################you will need to change some paths here!#####################
#list of input files
filename_cpr='f:/data/NASA_biophysical/CPR_data/All CPR Sample catalogue.xlsx'
filename_northpac_eddies='F:/data/NASA_biophysical/aviso/eddy_trajectory_19930101_20170106_north_pacific.nc'
filename_cpr_eddy='F:/data/NASA_biophysical/collocated_data/eddy_cpr_data_north_pacific.nc'
filename_eddy='F:/data/NASA_biophysical/collocated_data/eddy_ranking_data_north_pacific.nc'
#output files
filename_cpr_expanded='f:/data/NASA_biophysical/collocated_data/All CPR Sample catalogue with eddy info2.xlsx'
filename_cpr_expanded_netcdf='f:/data/NASA_biophysical/collocated_data/All CPR Sample catalogue with eddy info2.nc'
#################################################################################

In [52]:
#define function to get all the data at once, use same years for climatology for all data
def get_data():
    
    #climatology years
    cyr1,cyr2='1993-01-01','2018-12-31'
    
    # CCMP test
    dir_pattern_zarr = 'F:/data/sat_data/ccmp/zarr/'
    ds= xr.open_zarr(dir_pattern_zarr)
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_ccmp = ds.sortby(ds.lon)
    ds_ccmp = ds_ccmp.drop('nobs')
    for var in ds_ccmp:
        tem = ds_ccmp[var].attrs
        tem['var_name']='ccmp_'+str(var)
        ds_ccmp[var].attrs=tem
    ds_ccmp_clim = ds_ccmp.sel(time=slice(cyr1,cyr2))
    ds_ccmp_clim = ds_ccmp_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
    
    # AVISO test
    dir_pattern_zarr = 'F:/data/sat_data/aviso/zarr/'
    ds= xr.open_zarr(dir_pattern_zarr)
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_aviso = ds.sortby(ds.lon).drop({'lat_bnds','lon_bnds','crs','err'})
    for var in ds_aviso:
        tem = ds_aviso[var].attrs
        tem['var_name']='aviso_'+str(var)
        ds_aviso[var].attrs=tem
    ds_aviso_clim = ds_aviso.sel(time=slice(cyr1,cyr2))
    ds_aviso_clim = ds_aviso_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)    

    #sst
    dir_pattern_zarr = 'F:/data/sst/cmc/zarr/'
    ds_sst= xr.open_zarr(dir_pattern_zarr)
    ds_sst = ds_sst.drop({'analysis_error','mask','sea_ice_fraction'})
    tem = ds_sst.analysed_sst.attrs
    tem['var_name']='cmc_sst'
    ds_sst.analysed_sst.attrs=tem
    ds_sst_clim = ds_sst.sel(time=slice(cyr1,cyr2))
    ds_sst_clim = ds_sst_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
    
    #put data into a dictionary
    data_dict={'aviso':ds_aviso,
               'wnd':ds_ccmp,
               'sst':ds_sst}
    clim_dict={'aviso_clim':ds_aviso_clim,
               'wnd_clim':ds_ccmp_clim,
               'sst_clim':ds_sst_clim}
    
    return data_dict,clim_dict

In [53]:
data,clim = get_data()

In [17]:
filename='F:/data/NASA_biophysical//collocated_data/All CPR Sample catalogue with eddy info4.nc'
ds_eddy = xr.open_dataset(filename)
tt=np.empty(ds_eddy.z.size,dtype='datetime64[ns]') 
for i in range(ds_eddy.z.size):
    tstr=str(ds_eddy.cpr_sample_year[i].data)+'-'+str(ds_eddy.cpr_sample_month[i].data).zfill(2)+'-'+str(ds_eddy.cpr_sample_day[i].data).zfill(2)
    tem=np.datetime64(tstr)
    tt[i]=tem
ds_eddy['cpr_sample_time']=tt
ds_eddy

In [54]:
ds_data.sla.attrs

{'comment': 'The sea level anomaly is the sea surface height above mean sea surface; it is referenced to the [1993, 2012] period; see the product user manual for details',
 'grid_mapping': 'crs',
 'long_name': 'Sea level anomaly',
 'standard_name': 'sea_surface_height_above_sea_level',
 'units': 'm'}

In [60]:
for name in data:
    ds_data=data[name]
    print('name',name)
    for var in ds_data:
        var_tem=ds_data[var].attrs['var_name']
        ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)*np.NaN
        ds_eddy[var_tem].attrs=ds_data[var].attrs
    print('var',var_tem)
    for i in range(2): #ds_eddy.z.size):
        lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
        lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
        #interp in time and select region around lat/lon to subset before loading data
        tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
        tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
        for var in ds_data:
            var_tem=ds_data[var].attrs['var_name']
            ds_eddy[var_tem]=tem[var]
for name in clim:
    ds_data=clim[name]
    print('name',name)
    for var in ds_data:
        var_tem=ds_data[var].attrs['var_name']+'_clim'
        ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)*np.NaN
        ds_eddy[var_tem].attrs=ds_data[var].attrs
    print('var',var_tem)
    for i in range(2): #ds_eddy.z.size):
        lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
        lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
        #interp in time and select region around lat/lon to subset before loading data
        tem = ds_data.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
        tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
        for var in ds_data:
            var_tem=ds_data[var].attrs['var_name']+'_clim'
            ds_eddy[var_tem]=tem[var]
      
        

name aviso
var aviso_vgosa
name wnd
var ccmp_vwnd
name sst
var cmc_sst
name aviso_clim
var aviso_vgosa_clim
name wnd_clim
var ccmp_vwnd_clim
name sst_clim
var cmc_sst_clim


In [29]:
for name in data:
    ds_data=data[name]
    for var in ds_data:
        var_tem=sbase+var
        ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
        var_tem=cbase+var
        ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
    #collocate data
    for i in range(ds_eddy.z.size):
        lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
        lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
        #interp in time and select region around lat/lon to subset before loading data
        tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
        tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
        for var in svar:
            var_tem=sbase+str(var)
            ds_eddy[var_tem]=tem[var]
        tem = ds_data_clim.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
        tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
        for var in svar:
            var_tem=cbase+str(var)
            ds_eddy[var_tem]=tem[var]


<xarray.Dataset>
Dimensions:  (lat: 720, lon: 1440, nv: 2, time: 9508)
Coordinates:
  * lat      (lat) float32 -89.875 -89.625 -89.375 ... 89.375 89.625 89.875
  * lon      (lon) float32 -179.875 -179.625 -179.375 ... 179.625 179.875
  * nv       (nv) int32 0 1
  * time     (time) datetime64[ns] 1993-01-01 1993-01-02 ... 2019-01-12
Data variables:
    adt      (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    crs      (time) int32 dask.array<chunksize=(1000,), meta=np.ndarray>
    err      (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    sla      (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    ugos     (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    ugosa    (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    vgos     (time, lat, lon) float64 dask.array<chunksize=(1000, 180, 180), meta=np.ndarray>
    vgosa    

In [59]:
ds_eddy

# Collocate data

In [None]:
#create list of variables
svar=[]
sbase = 'cpr_sample_ccmp2_'
cbase = 'cpr_sample_ccmp2_clim_'
ds_data=data['wnd']
ds_data_clim=data['wnd_clim']
for var in ds_data:
    if (var=='nobs'):
        continue
    svar.append(var)
#create dummy placeholders
for var in svar:
    var_tem=sbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
    var_tem=cbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
#collocate data
for i in range(ds_eddy.z.size):
    lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
    lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
    #interp in time and select region around lat/lon to subset before loading data
    tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=sbase+str(var)
        ds_eddy[var_tem]=tem[var]
    tem = ds_data_clim.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=cbase+str(var)
        ds_eddy[var_tem]=tem[var]


In [None]:
#check answers
#for i in range(4):
#    print(ds_eddy.cpr_sample_ccmp_uwnd[i].data,ds_eddy.cpr_sample_ccmp_uwnd2[i].data)    
#    print(ds_eddy.cpr_sample_ccmp_uwnd_clim[i].data,ds_eddy.cpr_sample_ccmp_uwnd_clim2[i].data)
#    print(ds_eddy.cpr_sample_ccmp_vwnd[i].data,ds_eddy.cpr_sample_ccmp_vwnd2[i].data)    
#    print(ds_eddy.cpr_sample_ccmp_vwnd_clim[i].data,ds_eddy.cpr_sample_ccmp_vwnd_clim2[i].data)

# AVISO data

In [None]:
# AVISO test
dir_pattern_zarr = 'F:/data/sat_data/aviso/zarr/'
ds= xr.open_zarr(dir_pattern_zarr)
ds = ds.rename({'latitude':'lat','longitude':'lon'})
ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
ds_aviso = ds.sortby(ds.lon)
ds_aviso_clim = ds_aviso.sel(time=slice('1993-01-01','2018-12-31'))
ds_aviso_clim = ds_aviso_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)

In [None]:
#create list of variables
svar=[]
sbase = 'cpr_sample_aviso_'
cbase = 'cpr_sample_aviso_clim_'
ds_data=ds_ccmp
ds_data_clim=ds_ccmp_clim
for var in ds_data:
    if (var=='lat_bnds' or var=='lon_bnds'):
        continue
    svar.append(var)
#create dummy placeholders
for var in svar:
    var_tem=sbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
    var_tem=cbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
#collocate data
for i in range(ds_eddy.z.size):
    lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
    lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
    #interp in time and select region around lat/lon to subset before loading data
    tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=sbase+str(var)
        ds_eddy[var_tem]=tem[var]
    tem = ds_data_clim.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=cbase+str(var)
        ds_eddy[var_tem]=tem[var]


# CMC SST data

In [None]:
dir_pattern_zarr = 'F:/data/sst/cmc/zarr/'
ds_sst= xr.open_zarr(dir_pattern_zarr)
ds_sst_clim = ds_sst.sel(time=slice('1992-01-01','2018-12-31'))
ds_sst_clim = ds_sst_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)

In [None]:
#create list of variables
svar=[]
sbase = 'cpr_sample_cmc_sst_'
cbase = 'cpr_sample_cmc_sst_clim_'
ds_data=ds_ccmp
ds_data_clim=ds_ccmp_clim
for var in ds_data:
    if (var=='analysis_error' or var=='mask' or var=='sea_ice_fraction'):
        continue
    svar.append(var)
#create dummy placeholders
for var in svar:
    var_tem=sbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
    var_tem=cbase+var
    ds_eddy[var_tem]=ds_eddy.cpr_sample_ccmp_uwnd.copy(deep=True)
#collocate data
for i in range(ds_eddy.z.size):
    lat1,lat2=ds_eddy.cpr_sample_lat[i].data-1,ds_eddy.cpr_sample_lat[i].data+1
    lon1,lon2=ds_eddy.cpr_sample_lon[i].data-1,ds_eddy.cpr_sample_lon[i].data+1
    #interp in time and select region around lat/lon to subset before loading data
    tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=sbase+str(var)
        ds_eddy[var_tem]=tem[var]
    tem = ds_data_clim.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.cpr_sample_lat[i].data,lon=ds_eddy.cpr_sample_lon[i].data)
    for var in svar:
        var_tem=cbase+str(var)
        ds_eddy[var_tem]=tem[var]


# Read in ETOPO1

In [None]:
import xarray as xr
import numpy as np
from scipy.interpolate import RectBivariateSpline
#get bathymetry from ETOPO1
fname_topo = 'F:/data/topo/ETOPO1_Ice_g_gmt4.grd'
ds_topo = xr.open_dataset(fname_topo)
x = ds_topo.x  #21601
y = ds_topo.y   #10801
topo = ds_topo.z  #(10801, 21601)
ds_topo = ds_topo.rename_dims({'x':'lon','y':'lat'}).rename({'x':'lon','y':'lat'})
ds_topo

In [None]:
fig, (ax1) = plt.subplots(nrows=1, figsize=(6, 5.4))
im = ax1.imshow(ds_topo.z[7000:9500,0:4500], interpolation='bilinear',vmin=-7000.0, vmax=1.0,aspect='auto',origin='lower')
plt.show()

# Find nearest topo measurement for cpr data using xarray interp

In [None]:
ds_eddy['cpr_sample_ETOPO_depth2']=ds_topo2.z.interp(lat=ds_eddy.cpr_sample_lat,lon=ds_eddy.cpr_sample_lon,method='nearest')

In [None]:
filename_out='F:/data/NASA_biophysical//collocated_data/All CPR Sample catalogue with eddy info_version2020_04_21.nc'
ds_eddy.to_netcdf(filename_out)

# NOW, the point of this is to look up collocated eddy information and get the history of the data.  Steps are:
1. Read in list of collocated eddies.
2. Create list of unique eddy ID
3. Read in full eddy database and select eddy id
4. collocate environmental data for entire eddy history
5. save file

In [None]:
#ds_eddy.cpr_eddy_data_track_id
#remove any duplicates
_, index = np.unique(ds_eddy['cpr_eddy_data_track_id'], return_index=True)
#ds_list=ds_eddy.isel(=index)

In [None]:
#drop all data where eddy radius < distance to eddy
subset = ds_eddy.where(ds_eddy.cpr_eddy_data_radius-ds_eddy.cpr_eddy_data_distance>0,drop=True)
(subset.cpr_eddy_data_radius-subset.cpr_eddy_data_distance).plot()

In [None]:
#find unique id & create a list
_, index = np.unique(subset['cpr_eddy_data_track_id'], return_index=True)
eddy_list = subset['cpr_eddy_data_track_id'][index]

In [None]:
filename_aviso='f:/data/NASA_biophysical/aviso/eddy_trajectory_19930101_20170106.nc'   #From AVISO  website
ds = xr.open_dataset(filename_aviso)
#print(ds_tem)
#ds=xr.open_dataset(filename_northpac_eddies)
ds = ds.rename({'latitude':'lat','longitude':'lon'})
ds['lon'] = (ds['lon'] + 180) % 360 - 180
ds_eddy_all = ds
print(ds_eddy_all)


In [None]:
ds_eddy_all

In [None]:
ds_data=ds_ccmp
ds_data_clim=ds_ccmp_clim
for var in ds_data:
    if (var=='analysis_error' or var=='mask' or var=='sea_ice_fraction'):
        continue
    svar.append(var)
#create subset of eddy data
ds_eddy = ds_eddy_all.where(ds_eddy_all.track==eddy_list[ieddy],drop=True)
#create dummy placeholders
for var in svar:
    var_tem=sbase+var
    ds_eddy[var_tem]=ds_eddy.speed_average.copy(deep=True)
    var_tem=cbase+var
    ds_eddy[var_tem]=ds_eddy.speed_average.copy(deep=True)
#collocate data
for i in range(ds_eddy.z.size):
    lat1,lat2=ds_eddy.lat[i].data-1,ds_eddy.lat[i].data+1
    lon1,lon2=ds_eddy.lon[i].data-1,ds_eddy.lon[i].data+1
    #interp in time and select region around lat/lon to subset before loading data
    tem = ds_data.interp(time=ds_eddy.cpr_sample_time[i].data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.lat[i].data,lon=ds_eddy.lon[i].data)
    for var in svar:
        var_tem=sbase+str(var)
        ds_eddy[var_tem]=tem[var]
    tem = ds_data_clim.sel(dayofyear=ds_eddy.cpr_sample_time[i].dt.dayofyear.data).sel(lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
    tem = tem.interp(lat=ds_eddy.lat[i].data,lon=ds_eddy.lon[i].data)
    for var in svar:
        var_tem=cbase+str(var)
        ds_eddy[var_tem]=tem[var]
# topo
    ds_eddy['ETOPO_depth']=ds_topo2.z.interp(lat=ds_eddy.cpr_sample_lat,lon=ds_eddy.cpr_sample_lon,method='nearest')

In [None]:
ds