In [None]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
####################you will need to change some paths here!#####################
#list of input files
filename_bird='f:/data/NASA_biophysical/collocated_data/NPPSD_GOA_allseabird_wide.csv'
#output files
filename_bird_out='f:/data/NASA_biophysical/collocated_data/NPPSD_GOA_allseabird_wide_sat_data.csv'
filename_bird_out_netcdf='f:/data/NASA_biophysical/collocated_data/NPPSD_GOA_allseabird_wide_sat_data.nc'
#################################################################################

In [None]:
#read in csv file in to panda dataframe
ds_bird = pd.read_csv(filename_bird)
#calculate time
tem=np.ones(len(ds_bird),dtype='datetime64[ns]')
for i in range(len(ds_bird)):
    tstr = str(ds_bird.Year[i])+'-'+str(ds_bird.Month[i]).zfill(2)+'-'+str(ds_bird.Day[i]).zfill(2)
    tem[i]=np.datetime64(tstr)
ds_bird['time']=tem
ds_bird

In [None]:
#just check lat/lon & see looks okay
plt.scatter(ds_bird.Lon,ds_bird.Lat)

# Define function to read in data and put in a dictionary

In [None]:
#define function to get all the data at once, use same years for climatology for all data
def get_data():
    
    #climatology years
    cyr1,cyr2='1993-01-01','2018-12-31'
    
    # CCMP test
    dir_pattern_zarr = 'F:/data/sat_data/ccmp/zarr/'
    ds= xr.open_zarr(dir_pattern_zarr)
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_ccmp = ds.sortby(ds.lon)
    ds_ccmp = ds_ccmp.drop('nobs')
    for var in ds_ccmp:
        tem = ds_ccmp[var].attrs
        tem['var_name']='ccmp_'+str(var)
        ds_ccmp[var].attrs=tem
    ds_ccmp_clim = ds_ccmp.sel(time=slice(cyr1,cyr2))
    ds_ccmp_clim = ds_ccmp_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
    
    # AVISO test
    dir_pattern_zarr = 'F:/data/sat_data/aviso/zarr/'
    ds= xr.open_zarr(dir_pattern_zarr)
    ds = ds.rename({'latitude':'lat','longitude':'lon'})
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds_aviso = ds.sortby(ds.lon).drop({'lat_bnds','lon_bnds','crs','err'})
    for var in ds_aviso:
        tem = ds_aviso[var].attrs
        tem['var_name']='aviso_'+str(var)
        ds_aviso[var].attrs=tem
    ds_aviso_clim = ds_aviso.sel(time=slice(cyr1,cyr2))
    ds_aviso_clim = ds_aviso_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)    

    #sst
    dir_pattern_zarr = 'F:/data/sst/cmc/zarr/'
    ds_sst= xr.open_zarr(dir_pattern_zarr)
    ds_sst = ds_sst.drop({'analysis_error','mask','sea_ice_fraction'})
    tem = ds_sst.analysed_sst.attrs
    tem['var_name']='cmc_sst'
    ds_sst.analysed_sst.attrs=tem
    ds_sst_clim = ds_sst.sel(time=slice(cyr1,cyr2))
    ds_sst_clim = ds_sst_clim.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
    
    #get bathymetry from ETOPO1
    fname_topo = 'F:/data/topo/ETOPO1_Ice_g_gmt4.grd'
    ds = xr.open_dataset(fname_topo)
    ds_topo = ds.rename_dims({'x':'lon','y':'lat'}).rename({'x':'lon','y':'lat'})
    tem = ds_topo.z.attrs
    tem['var_name']='etopo_depth'
    ds_topo.z.attrs=tem

    #put data into a dictionary
    data_dict={'aviso':ds_aviso,
               'wnd':ds_ccmp,
               'sst':ds_sst,
              'topo':ds_topo}
    clim_dict={'aviso_clim':ds_aviso_clim,
               'wnd_clim':ds_ccmp_clim,
               'sst_clim':ds_sst_clim}
  
    return data_dict,clim_dict

In [None]:
data,clim = get_data()

# Collocate all data with bird data

In [None]:
for name in data:
    ds_data=data[name]
    if name=='topo':
        continue
    print('name',name)
    for var in ds_data:
        var_tem=ds_data[var].attrs['var_name']
        ds_bird[var_tem]=np.ones(len(ds_bird))*np.NaN
        ds_bird[var_tem].attrs=ds_data[var].attrs
    print('var',var_tem)
    for i in range(len(ds_bird)):
        #.interp will not work across chunked dimensions, this is a fix.  First re-chunk so that there is
        #no chunk in the time dim.  interp in time.  Then re-chunk so no chunks in space, interp in space.
        tem = ds_data.chunk({'time':len(ds_data.time),'lat':180,'lon':180})
        tem = tem.interp(time=ds_bird.time[i])
        if len(tem.time.dims)<1:
            continue
        tem = tem.chunk({'lat':len(ds_data.lat),'lon':len(ds_data.lon)})
        tem = tem.interp(lat=ds_bird.Lat[i],lon=ds_bird.Lon[i]).load()
        for var in ds_data:
            var_tem=ds_data[var].attrs['var_name']
            ds_bird[var_tem]=tem[var].data
for name in clim:
    ds_data=clim[name]
    print('name',name)
    for var in ds_data:
        var_tem=ds_data[var].attrs['var_name']+'_clim'
        ds_bird[var_tem]=np.ones(len(ds_bird))*np.NaN
        ds_bird[var_tem].attrs=ds_data[var].attrs
    print('var',var_tem)
    for i in range(len(ds_bird)):
        tem = ds_data.sel(dayofyear=ds_bird.time[i].dayofyear)
        tem = tem.chunk({'lat':len(ds_data.lat),'lon':len(ds_data.lon)})
        tem = tem.interp(lat=ds_bird.Lat[i],lon=ds_bird.Lon[i]).load()
        for var in ds_data:
            var_tem=ds_data[var].attrs['var_name']+'_clim'
            ds_bird[var_tem]=tem[var].data

#at topo info
#interp will create a new 2D array, to avoid that put the lat/lon into dataarrays
ds_topo=data['topo']
new_lat = xr.DataArray(ds_bird.Lat.values, dims='new_dim')
new_lon = xr.DataArray(ds_bird.Lon.values, dims='new_dim')
ds_bird['ETOPO_depth'] = ds_topo.z.interp(lat=new_lat, lon=new_lon,method='nearest')

#output data
ds_bird.to_csv(filename_bird_out)
DS_bird = xr.Dataset.from_dataframe(ds_bird)
DS_bird.to_netcdf(filename_bird_out_netcdf)

In [None]:
        tem = ds_data.sel(dayofyear=ds_bird.time[i].dayofyear)
        tem = tem.chunk({'lat':len(ds_data.lat),'lon':len(ds_data.lon)})
        tem = tem.interp(lat=ds_bird.Lat[i],lon=ds_bird.Lon[i]).load()
        tem.analysed_sst.data

In [None]:
#test topo
plt.scatter(ds_bird.Lon,ds_bird.Lat,c=ds_bird['ETOPO_depth'])

In [None]:
plt.scatter(ds_bird.Lon,ds_bird.Lat,c=ds_bird.aviso_sla_clim,vmin=-0.01,vmax=0.01)

In [None]:
ds_bird.aviso_sla_clim.plot()