In [2]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import cos, radians
import xarray as xr
import intake
import dask

#libs for dask gateway
from dask_gateway import Gateway
from dask.distributed import Client
####################you will need to change some paths here!#####################
#list of input files
adir = './../data/'
filename_cpr=adir + 'All CPR Sample catalogue.nc'
filename_northpac_eddies=adir + 'eddy_trajectory_19930101_20170106_north_pacific_2020_10_06a.nc'
filename_cpr_eddy=adir + 'eddy_cpr_data_north_pacific.nc'
filename_eddy=adir + 'eddy_ranking_data_north_pacific.nc'
fname_topo = adir + 'ETOPO1_Ice_g_gmt4.grd'
#output files
filename_cpr_expanded=adir + adir+'CPR/All CPR Sample catalogue with env info_2020_10_05'
#################################################################################


In [2]:
gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=50)
client = Client(cluster)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [3]:
input_data = 'aviso'

In [4]:
def get_data():
    #climatology years
    cyr1,cyr2='1993-01-01','2018-12-31'
    
    # CCMP test
    cat_pangeo = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/master.yaml")
    ds = cat_pangeo.atmosphere.nasa_ccmp_wind_vectors.to_dask()
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_ccmp = ds.sortby(ds.lon)
    ds_ccmp = ds_ccmp.drop('nobs')
    for var in ds_ccmp:
        tem = ds_ccmp[var].attrs
        tem['var_name']='ccmp_'+str(var)
        ds_ccmp[var].attrs=tem
    ds_ccmp_clim = ds_ccmp.sel(time=slice(cyr1,cyr2))
    ds_ccmp_clim = ds_ccmp_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
    
    # AVISO test
    fs = gcsfs.GCSFileSystem(project='pangeo-181919',requester_pays=True)
    zstore = 'gs://pangeo-cmems-duacs/sea_surface_height_clg'
    ds = xr.open_zarr(fs.get_mapper(zstore), consolidated=True)
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_aviso = ds.sortby(ds.lon).drop({'lat_bnds','lon_bnds','crs','err'})
    for var in ds_aviso:
        tem = ds_aviso[var].attrs
        tem['var_name']='aviso_'+str(var)
        ds_aviso[var].attrs=tem
    ds_aviso_clim = ds_aviso.sel(time=slice(cyr1,cyr2))
    ds_aviso_clim = ds_aviso_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)    
   
    #put data into a dictionary
    data_dict={'aviso':ds_aviso,
               'wnd':ds_ccmp,
              'topo':ds_topo}
    clim_dict={'aviso_clim':ds_aviso_clim,
               'wnd_clim':ds_ccmp_clim}
  
    return data_dict,clim_dict


In [None]:
#read in CPR data excell file using pandas library
ds_cpr = xr.open_dataset(filename_cpr)

ds_cpr.cpr_sample_lon.min().data,ds_cpr.cpr_sample_lon.max().data,ds_cpr.cpr_sample_lat.min().data,ds_cpr.cpr_sample_lat.max().data

ds_eddy = xr.open_dataset(filename_northpac_eddies).rename({'Longitude':'lon','Latitude':'lat'})

ds_eddy_cpr = xr.open_dataset(filename_cpr_eddy)

#get bathymetry from ETOPO1
ds = xr.open_dataset(fname_topo)
ds_topo = ds.rename_dims({'x':'lon','y':'lat'}).rename({'x':'lon','y':'lat'})
#tem = ds_topo.isel(lat=slice(7000,9500),lon=slice(0,4500))
#tem.z.plot()
tt = ds_topo.z.interp(lat=ds_cpr.cpr_sample_lat,lon=ds_cpr.cpr_sample_lon,method='nearest').data
ds_cpr['ETOPO_depth']= xr.DataArray(tt, coords={'index':ds_cpr.index}, dims=["index"])
ds_cpr['cpr_sample_lon2'] = np.mod(ds_cpr['cpr_sample_lon'],360)

#plt.scatter(ds_cpr.cpr_sample_lon2,ds_cpr.cpr_sample_lat,c=ds_cpr.ETOPO_depth,cmap='coolwarm',vmin=-8000,vmax=8000)


data,clim = get_data()


# In[ ]:


#ds_cpr = xr.open_dataset(filename_bird_out_eddy_netcdf)
ilen_bird1 = len(ds_cpr.cpr_sample_lon)

clonmin,clonmax = ds_cpr.cpr_sample_lon.min().data,ds_cpr.cpr_sample_lon.max().data
clatmin,clatmax = ds_cpr.cpr_sample_lat.min().data,ds_cpr.cpr_sample_lat.max().data
t1save=0

for name in data:
    ds_data=data[name]
    if name=='topo':
        continue
    if not name==input_data:
        continue
    print('name',name)   
    for var in ds_data:
        var_tem=ds_data[var].attrs['var_name']
        ds_cpr[var_tem]=xr.DataArray(np.nan*np.empty((ilen_bird1), 
                                                      dtype=str(ds_data[var].dtype)), 
                                      coords={'index': ds_cpr.index},
                                      dims=('index'))
        ds_cpr[var_tem].attrs=ds_data[var].attrs
    print('var',var_tem)
    for i in range(ilen_bird1):
        if np.isnan(ds_cpr.cpr_sample_lat[i]):
            continue
        if ds_cpr.cpr_sample_time[i]<ds_data.time.min():
            continue
        if ds_cpr.cpr_sample_time[i]>ds_data.time.max():
            continue
        t1,t2 = ds_cpr.cpr_sample_time[i]-np.timedelta64(24,'h'), ds_cpr.cpr_sample_time[i]+np.timedelta64(24,'h')
        if not t1==t1save:
            tem2 = ds_data.sel(time=slice(t1,t2),lat=slice(clatmin-.5,clatmax+.5),lon=slice(clonmin-.5,clonmax+.5)).load()               
            t1save=t1
            print(i,ilen_bird1)
            #            lat1,lat2=ds_cpr.cpr_sample_lat[i]-.5,ds_cpr.cpr_sample_lat[i,j]+.5
            #            lon1,lon2=ds_cpr.cpr_sample_lon[i]-.5,ds_cpr.cpr_sample_lon[i,j]+.5
            #            tem = ds_data.sel(time=slice(t1,t2),lat=slice(lat1,lat2),lon=slice(lon1,lon2)).load()
        tem = tem2.interp(time=ds_cpr.cpr_sample_time[i],lat=ds_cpr.cpr_sample_lat[i],lon=ds_cpr.cpr_sample_lon[i])
        for var in ds_data:
            var_tem=ds_data[var].attrs['var_name']
            ds_cpr[var_tem][i]=tem[var].data
    #output data
    df_bird = ds_cpr.to_dataframe()
    df_bird.to_csv(filename_cpr_expanded+name+'.csv')
    ds_cpr.to_netcdf(filename_cpr_expanded+name+'.nc')

