# This is the in situ and SSS collocation code. 


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import xarray as xr
import scipy
from glob import glob
import cartopy.crs as ccrs
from pyresample.geometry import AreaDefinition
from pyresample import image, geometry, load_area, save_quicklook, SwathDefinition
from pyresample.kd_tree import resample_nearest
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    return km

# Read in USV data
Read in the Saildrone USV file either from a local disc or using OpenDAP.



In [None]:
#filename_usv='f:/data/cruise_data/saildrone/baja-2018/saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc'
def read_usv():
    filename_usv='f:/data/cruise_data/saildrone/baja-2018/saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc'
    ds_usv = xr.open_dataset(filename_usv)
    ds_usv.close()
    ds_usv = ds_usv.isel(trajectory=0).swap_dims({'obs':'time'}).rename({'longitude':'lon','latitude':'lat'})
    ds_usv = ds_usv.sel(time=slice('2018-04-12T02','2018-06-10T18')) #get rid of last part and first part where USV being towed
    ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear') #there are 6 nan values
    ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear')
    ds_usv['wind_speed']=np.sqrt(ds_usv.UWND_MEAN**2+ds_usv.VWND_MEAN**2)
    ds_usv['wind_dir']=np.arctan2(ds_usv.VWND_MEAN,ds_usv.UWND_MEAN)*180/np.pi
    return ds_usv

In [None]:
#plot cruise SSS with coastlines
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(ds_usv.lon, ds_usv.lat, s=3.0, c=ds_usv.SAL_MEAN, edgecolor='none', cmap='jet',vmin=33,vmax=34.35)
ax.coastlines()
x1,x2,y1,y2 = -128,-112,25,40
ax.set_xlim(x1,x2)
ax.set_ylim(y1,y2)
ax.set_xticks(np.arange(x1,x2,4))
ax.set_yticks(np.arange(y1,y2,5))
cax = plt.colorbar(cs1)
cax.set_label('Salinity (psu)')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/baja_location.png'
plt.savefig(fig_fname, transparent=False, format='png')

## Here is the search code first step.  Narrow down what orbital files to search in

In [None]:
#search each day of USV data for min/max lat/lon 
#read in orbital files for that day and check to see if any valid data in that box

#intialize grid
area_def = load_area('areas.cfg', 'pc_world')
rlon=np.arange(-180,180,.1)
rlat=np.arange(90,-90,-.1)

#init filelist
file_save=[]

#search usv data
minday,maxday = ds_usv.time[0],ds_usv.time[-1]
usv_day = minday
print(minday.data,maxday.data)
while usv_day<=maxday:
    usv_day += np.timedelta64(1,'D')
    check_day = np.datetime64(str(usv_day.dt.year.data)+'-'+str(usv_day.dt.month.data).zfill(2)+'-'+str(usv_day.dt.day.data).zfill(2))
    usv_day1 = usv_day + np.timedelta64(1,'D')
    check_day1 = np.datetime64(str(usv_day1.dt.year.data)+'-'+str(usv_day1.dt.month.data).zfill(2)+'-'+str(usv_day1.dt.day.data).zfill(2))
    ds_day = ds_usv.sel(time=slice(check_day,check_day1))
    ilen = ds_day.time.size
    print(check_day,check_day1,ilen)
    if ilen<10:
        continue
    minlon,maxlon,minlat,maxlat = ds_day.lon.min().data,ds_day.lon.max().data,ds_day.lat.min().data,ds_day.lat.max().data
#caluclate filelist
    filelist = glob('F:/data/sat_data/smap/SSS/L2/RSS/V3/40km/'
                    +str(usv_day.dt.year.data)+'/'+str(usv_day.dt.dayofyear.data)+'/*.nc')   
    x,y,z = [],[],[]
    for file in filelist:
        ds = xr.open_dataset(file)
        ds.close()
        x = ds.cellon[:,:,0].data #np.append(x,ds.cellon[:,:,0].data)
        y = ds.cellat[:,:,0].data #np.append(y,ds.cellat[:,:,0].data)
        z = ds.sss_smap[:,:,0].data #np.append(z,ds.sss_smap[:,:,0].data)
        lons,lats,data = x,y,z 
        lons = np.mod(lons+180,360)-180
        swath_def = SwathDefinition(lons, lats)
        result1 = resample_nearest(swath_def, data, area_def, radius_of_influence=20000, fill_value=None)
        da = xr.DataArray(result1,name='sss',coords={'lat':rlat,'lon':rlon},dims=('lat','lon'))
        subset = da.sel(lat = slice(maxlat,minlat),lon=slice(minlon,maxlon))
        num_obs = np.isfinite(subset).sum()
        if num_obs>0:
            file_save = np.append(file_save,file)
df = xr.DataArray(file_save,name='filenames')
df.to_netcdf('C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/rss40km_filesave.nc')

In [None]:
#now read in orbits that have collocated data
#subset orbit to region USV has data on that day
#need to fill lat/lon nan with value for tree to build
#just picked -89 as it is outside usv observation region
from scipy import spatial
df = xr.open_dataset('C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/rss40km_filesave.nc')
for ilook in range(2):
    ds_usv = read_usv()
    ilen = ds_usv.time.shape[0]
    ds_usv['deltaT']=np.ones(ilen)*99999
    ds_usv['smap_SSS']=np.ones(ilen)*999999
    ds_usv['smap_name']=np.empty(ilen,dtype=str)
    ds_usv['smap_ydim']=np.ones(ilen)*999999
    ds_usv['smap_xdim']=np.ones(ilen)*999999
    for file2 in df.filenames.data:
        file = file2
        file.replace('\\','/')
        ds = xr.open_dataset(file)
        ds.close()       
        lons = ds.cellon[:,:,ilook].fillna(-89).data #np.append(x,ds.cellon[:,:,0].data)
        lats = ds.cellat[:,:,ilook].fillna(-89).data #np.append(y,ds.cellat[:,:,0].data)
        inputdata = list(zip(lons.ravel(), lats.ravel()))
        tree = spatial.KDTree(inputdata)
        orbit_time = ds.time[:,:,0].max().data-np.timedelta64(1,'D')
        orbit_time2 = ds.time[:,:,0].max().data+np.timedelta64(1,'D')    
        usv_subset = ds_usv.sel(time=slice(orbit_time,orbit_time2))
        for iusv in range(ilen):
            if (ds_usv.time[iusv]<orbit_time) or (ds_usv.time[iusv]>orbit_time2):
                continue
            pts = np.array([ds_usv.lon[iusv]+360, ds_usv.lat[iusv]])
            tree.query(pts,k=1)
            i=tree.query(pts)[1]
            ii=np.int(np.floor(i/1560))
            jj=np.mod(i,1560)
            deltaTa = ((ds_usv.time[iusv]-ds.time[ii,jj,ilook]).data)/ np.timedelta64(1,'m')
            if np.abs(deltaTa)<np.abs(ds_usv.deltaT[iusv].data/ np.timedelta64(1,'m')):
                ds_usv.deltaT[iusv]=deltaTa
                ds_usv.smap_SSS[iusv]=ds.sss_smap[ii,jj,ilook]
                ds_usv.smap_name[iusv]=file2
                ds_usv.smap_ydim[iusv]=ii
                ds_usv.smap_xdim[iusv]=jj
    fileout = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/rss40km'+str(ilook)+'_usv.nc'
    ds_usv.to_netcdf(fileout)

In [40]:
print(ds_usv.deltaT[iusv])
print(deltaTa)
deltaTa = ((ds_usv.time[iusv]-ds.time[ii,jj,ilook]).data)/ np.timedelta64(1,'m')
print(deltaTa)
print(np.abs(ds_usv.deltaT[iusv].data/ np.timedelta64(1,'m')))

<xarray.DataArray 'deltaT' ()>
array(5999940000000000, dtype='timedelta64[ns]')
Coordinates:
    trajectory  float32 1002.0
    deltaT      timedelta64[ns] 69 days 10:39:00
-1374.9180717166666
-1374.9180717166666
99999.0


In [22]:
ds_usv['deltaT']=np.ones(ilen,dtype=np.timedelta64)*99999*6e10 #np.timedelta64(999999,'m')
if np.abs(deltaT)<np.abs(ds_usv.deltaT[iusv].data):
    print(deltaT)

-1375 minutes


In [20]:
print(deltaT,ds_usv.deltaT[iusv].data)

-1375 minutes 999999 nanoseconds


In [None]:
ds_usv.sel(time=orbit_time, method='nearest')

In [None]:
(ds.cellat[:,:,0]-ds.cellat[:,:,1]).plot(vmin=-.1,vmax=.1)

In [None]:
print(ds2.cellat.data,ds2.cellon.data)
print(ds_usv.lat[1000].data,ds_usv.lon[1000].data)

In [None]:
print(tree.query(pts))
print(inputdata[tree.query(pts)[1]])
print([ds_usv.lon[1000].data+360, ds_usv.lat[1000].data])
print(len(inputdata))

In [None]:
print(ds_usv.lon[1000].data,ds_usv.lat[1000].data)
print(jj,ii, ii*1560+jj)
print(ds.cellon[ii,jj,0].data-360,ds.cellat[ii,jj,0].data)
print(lons[ii,jj]-360,lats[ii,jj])


In [None]:
ds = xr.open_dataset(file)
ds.close()
x = ds.cellon[:,:,0].data
y = ds.cellat[:,:,0].data
z = ds.sss_smap[:,:,0].data
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(x, y, s=1.0, c=z, edgecolor='none', cmap='jet')
minlon,maxlon,minlat,maxlat = ds_usv.lon.min().data,ds_usv.lon.max().data,ds_usv.lat.min().data,ds_usv.lat.max().data
ax.plot([minlon,maxlon,maxlon,minlon,minlon],[minlat,minlat,maxlat,maxlat,minlat])
ax.plot(ds.cellon[jj,ii,0],ds.cellat[jj,ii,0],'b*')
ax.plot(ds_usv.lon[1000],ds_usv.lat[1000],'ro')
ax.coastlines()
ax.set_xlim(-130,-110)
ax.set_ylim(25,40)


In [None]:
np.nanmax(ds.cellon)

In [None]:
minlon,maxlon,minlat,maxlat = ds_usv.lon.min().data,ds_usv.lon.max().data,ds_usv.lat.min().data,ds_usv.lat.max().data
print(minlon,maxlon,minlat,maxlat)
#cond = (lons>=minlon) & (lons<=maxlon) & (lats>=minlat) & (lats<=maxlat)
#meets_condition = (air_day.air > 22) & (air_day.air < 30)
#subset = ds.where((lons>=minlon) & (lons<=maxlon) & (lats>=minlat) & (lats<=maxlat),drop=True)
subset = ds.where((lons>=minlon) & (lons<=maxlon) & (lats>=minlat) & (lats<=maxlat))
print(subset)

# Read in ACCESS data

In [None]:
filename = 'F:/data/cruise_data/access/rockfish_casts_2011.nc'
ds = xr.open_dataset(filename)
ds = ds.swap_dims({'row':'time'})
ds = ds.rename({'latitude':'lat','longitude':'lon'})
ds_access1 = ds.copy(deep=True)
filename = 'F:/data/cruise_data/access/rockfish_casts_2015.nc'
ds = xr.open_dataset(filename)
ds = ds.swap_dims({'row':'time'})
ds = ds.rename({'latitude':'lat','longitude':'lon'})
ds_access2 = ds.copy(deep=True)
ds_access = xr.concat((ds_access1,ds_access2),dim='time')

In [None]:
ds_access

In [None]:
ds_access.time.dt.month

In [None]:
plt.plot(ds_access.time.dt.year,ds_access.time.dt.dayofyear,'.')
plt.xlabel('Day of Year')
plt.ylabel('Year')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/access_date.png'
plt.savefig(fig_fname, transparent=False, format='png')

In [None]:
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(ds_access.lon, ds_access.lat, s=3.0, c=ds_access.salinity, edgecolor='none', cmap='jet',vmin=33,vmax=34.35)
ax.coastlines()
x1,x2,y1,y2 = -128,-112,30,50
ax.set_xlim(x1,x2)
ax.set_ylim(y1,y2)
ax.set_xticks(np.arange(x1,x2,4))
ax.set_yticks(np.arange(y1,y2,5))
cax = plt.colorbar(cs1)
cax.set_label('Salinity (psu)')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/access_location.png'
plt.savefig(fig_fname, transparent=False, format='png')

# Read in 2018 Saildrone West Coast Survey

In [None]:
#https://coastwatch.pfeg.noaa.gov/erddap/
#url = 'https://coastwatch.pfeg.noaa.gov/erddap/tabledap/saildrone_west_coast_survey_2018.nc'
#url = 'https://ferret.pmel.noaa.gov/pmel/erddap/tabledap/saildrone_west_coast_survey_2018.nc'
url = 'https://ferret.pmel.noaa.gov/pmel/erddap/tabledap/saildrone_west_coast_survey_2018'
#url = 'F:/data/cruise_data/saildrone/2018_wcoast/saildrone_west_coast_survey_2018_f374_2e74_3de8.nc'
#url = 'https://ferret.pmel.noaa.gov/pmel/erddap/tabledap/saildrone_west_coast_survey_2018.nc'
ds_usv = xr.open_dataset(url)
ds_usv.close()
ds_usv['lat']=ds_usv['s.latitude']
ds_usv['lon']=ds_usv['s.longitude']
ds_usv['time']=ds_usv['s.time']
ds_usv['trajectory']=ds_usv['s.trajectory']
ds_usv['SAL_MEAN']=ds_usv['s.SAL_MEAN']
ds_usv['TEMP_CTD_MEAN']=ds_usv['s.TEMP_CTD_MEAN']
ds_usv = ds_usv.swap_dims({'s':'time'})
#print(ds_usv.time.min().data,ds_usv.time.max().data)
#ds_usv.load()


In [None]:
#used this code to figure out where the data goes bad
ilen = ds_usv.SAL_MEAN.size
print(ilen)
tem = np.nan(ilen)
for i in range(787066):
    if ds_usv.SAL_MEAN[i]>20:
        tem[i]=ds_usv.SAL_MEAN[i]
tem[i:]=np.nan

In [None]:
#there is something bad in the data file above 776100 so subset the data to just the good part
ds_usv2 = ds_usv.isel(time=slice(None,776100))
xlon =  ds_usv2.lon.copy(deep=True)
xlat =  ds_usv2.lat.copy(deep=True)
salinity = ds_usv2.SAL_MEAN.copy(deep=True)
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(xlon, xlat, s=3.0, c=salinity, edgecolor='none', cmap='jet',vmin=32,vmax=34.35)
ax.coastlines()
x1,x2,y1,y2 = -130,-114,30,52
ax.set_xlim(x1,x2)
ax.set_ylim(y1,y2)
ax.set_xticks(np.arange(x1,x2,4))
ax.set_yticks(np.arange(y1,y2,5))
cax = plt.colorbar(cs1)
cax.set_label('Salinity (psu)')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/wcoast_location2.png'
plt.savefig(fig_fname, transparent=False, format='png')


In [None]:
ds_usv2.trajectory[0:5]

In [None]:
url = 'https://ferret.pmel.noaa.gov/pmel/erddap/tabledap/saildrone_west_coast_survey_2018'
#url = 'F:/data/cruise_data/saildrone/2018_wcoast/saildrone_west_coast_survey_2018_f374_2e74_3de8.nc'
#url = 'https://ferret.pmel.noaa.gov/pmel/erddap/tabledap/saildrone_west_coast_survey_2018.nc'
ds_usv = xr.open_dataset(url)
ds_usv.close()
ds_usv

In [None]:
url = 'https://ferret.pmel.noaa.gov/generic/erddap/tabledap/saildrone_arctic_gts'
ds_usv = xr.open_dataset(url)
ds_usv = xr.open_dataset(url)
ds_usv.close()
ds_usv['lat']=ds_usv['s.latitude']
ds_usv['lon']=ds_usv['s.longitude']
ds_usv['time']=ds_usv['s.time']
ds_usv['trajectory']=ds_usv['s.trajectory']
ds_usv['SAL_MEAN']=ds_usv['s.SAL_MEAN']
ds_usv['TEMP_CTD_MEAN']=ds_usv['s.TEMP_CTD_MEAN']
#ds_usv = ds_usv.swap_dims({'s':'time'})


In [None]:
ds_usv

In [None]:
xlon =  ds_usv.lon.copy(deep=True)
xlat =  ds_usv.lat.copy(deep=True)
salinity = ds_usv.SAL_MEAN.copy(deep=True)
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(xlon, xlat, s=3.0, c=salinity, edgecolor='none', cmap='jet',vmin=32,vmax=34.35)
ax.coastlines()
x1,x2,y1,y2 = -130,-114,30,52
ax.set_xlim(x1,x2)
ax.set_ylim(y1,y2)
ax.set_xticks(np.arange(x1,x2,4))
ax.set_yticks(np.arange(y1,y2,5))
cax = plt.colorbar(cs1)
cax.set_label('Salinity (psu)')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/arctic_location.png'
plt.savefig(fig_fname, transparent=False, format='png')

In [None]:
url = 'F:/data/cruise_data/saildrone/2018_wcoast/saildrone_west_coast_survey_2018_f374_2e74_3de8.nc'
ds_usv = xr.open_dataset(url)
ds_usv.close()
xlon =  ds_usv.longitude.copy(deep=True)
xlat =  ds_usv.latitude.copy(deep=True)
salinity =  ds_usv.SAL_MEAN.copy(deep=True)
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(xlon, xlat, s=3.0, c=salinity, edgecolor='none', cmap='jet',vmin=32,vmax=34.35)
ax.coastlines()
x1,x2,y1,y2 = -128,-112,25,40
ax.set_xlim(x1,x2)
ax.set_ylim(y1,y2)
ax.set_xticks(np.arange(x1,x2,4))
ax.set_yticks(np.arange(y1,y2,5))
cax = plt.colorbar(cs1)
cax.set_label('Salinity (psu)')
fig_fname = 'C:/Users/gentemann/Google Drive/f_drive/docs/projects/SSS/figures/wcoast_location.png'
plt.savefig(fig_fname, transparent=False, format='png')


In [None]:
ds = xr.open_dataset(filelist[8])
ds.close()
x = ds.cellon[:,:,0].data
y = ds.cellat[:,:,0].data
z = ds.sss_smap[:,:,0].data
ax = plt.axes(projection=ccrs.PlateCarree())
cs1 = ax.scatter(x, y, s=1.0, c=z, edgecolor='none', cmap='jet')
minlon,maxlon,minlat,maxlat = ds_usv.lon.min().data,ds_usv.lon.max().data,ds_usv.lat.min().data,ds_usv.lat.max().data
ax.plot([minlon,maxlon,maxlon,minlon,minlon],[minlat,minlat,maxlat,maxlat,minlat])
ax.coastlines()


In [None]:
#create daily data arrays
#grid the data and search within the in situ obs to find which filenames are in right region


In [None]:
#create daily data arrays
rlon=np.arange(-180,180,.25)
rlat=np.arange(90,-90,-.25)
area_def = load_area('areas.cfg', 'pc_world')
itime=0
for ilook in range(1):
    x,y,x2,y2 = [],[],[],[]
    if itime==1:
        z = z2 = np.empty(0, np.int32)
    else:
        z = z2 = np.empty(0, np.datetime64)    
    for file in filelist:
        ds = xr.open_dataset(file)
        ds.close()
        x = np.append(x,ds.cellon[:,:780,ilook].data)
        y = np.append(y,ds.cellat[:,:780,ilook].data)
        if itime==1:
            z = np.append(z,ds.sss_smap[:,:780,ilook].data)
        else:
            z = np.append(z,ds.time[:,:780,ilook].data)
        x2 = np.append(x2,ds.cellon[:,780:,ilook].data)
        y2 = np.append(y2,ds.cellat[:,780:,ilook].data)
        if itime==1:
            z2 = np.append(z2,ds.sss_smap[:,780:,ilook].data)
        else:
            z2 = np.append(z2,ds.time[:,780:,ilook].data)
    lons,lats,data = x,y,z 
    lons = np.mod(lons+180,360)-180
    swath_def = SwathDefinition(lons, lats)
    result1 = resample_nearest(swath_def, data, area_def, radius_of_influence=20000, fill_value=None)
    lons,lats,data = x2,y2,z2 
    lons = np.mod(lons+180,360)-180
    swath_def = SwathDefinition(lons, lats)
    result2 = resample_nearest(swath_def, data, area_def, radius_of_influence=20000, fill_value=None)
    tem = np.stack((result1,result2))
    das = xr.DataArray(tem,name='sss1',coords={'iasc':[0,1],'lat':rlat,'lon':rlon},dims=('iasc','lat','lon'))
    dat = xr.DataArray(tem,name='time',coords={'iasc':[0,1],'lat':rlat,'lon':rlon},dims=('iasc','lat','lon'))
ds_out = xr.Dataset({das,dat})


In [None]:
ds_out = xr.Dataset({das,dat})
z=np.empty(0,np.datetime64)
print(type(z),type(ds.time[0,0,0].data))
z = np.append(z,ds.time[0,0,0].data)

In [None]:
    da0 = xr.DataArray(result,name='sss0',coords={'lat':rlat,'lon':rlon},dims=('lat','lon'))

#iasc=0
#da1 = xr.DataArray(result,name='sss1',coords={'lat':rlat,'lon':rlon,'asc':iasc},dims=('lat','lon','asc'))


In [None]:
da1.plot()

In [None]:
lons = x #ds.cellon[:,:,0].data
lons = np.mod(lons+180,360)-180
lats = y# ds.cellat[:,:,0].data
data = z#ds.sss_smap[:,:,0].data
area_def = load_area('areas.cfg', 'pc_world')
swath_def = SwathDefinition(lons, lats)
result = resample_nearest(swath_def, data, area_def, radius_of_influence=20000, fill_value=None)
save_quicklook('sss.png', area_def, result, num_meridians=0, num_parallels=0, label='Salinity (psu)')

In [None]:
rlon=np.arange(-180,180,.25)
rlat=np.arange(90,-90,-.25)
da = xr.DataArray(result,name='sss0',coords={'lat':rlat,'lon':rlon},dims=('lat','lon'))

In [None]:
da.plot()

In [None]:
plt.contourf(rlon,rlat,result)

In [None]:
#interpolate onto grid
# data coordinates and values
idim = ds.cellon.shape[0]
jdim = ds.cellon.shape[1]
x = ds.cellon[:,:,0].data.reshape(idim*jdim)
y = ds.cellat[:,:,0].data.reshape(idim*jdim)
z = ds.sss_smap[:,:,0].data.reshape(idim*jdim)

mask = np.isfinite(x) & np.isfinite(y)
x = x[mask]
y = y[mask]
z = z[mask]

# target grid to interpolate to
xi,yi = np.arange(0,360,.5), np.arange(-90,90,.5)
xi,yi = np.meshgrid(xi,yi)

## set mask
#mask = (xi > 0.5) & (xi < 0.6) & (yi > 0.5) & (yi < 0.6)

# interpolate
zi = griddata((x,y),z,(xi,yi),method='nearest')

plt.contourf(xi,yi,zi)

In order to use open_mfdataset you need to either provide a path or a list of filenames to input

Here we use the USV cruise start and end date to read in all data for that period

# Read in SSS L2 data
Read in data using open_mfdataset with the option coords='minimal'

The dataset is printed out and you can see that rather than straight xarray data array for each of the data variables open_mfdataset using dask arrays

In [None]:
lon = np.arange(0,360)
np.mod(lon+180,360)-180
np.mod(lon + 180,360) - 180

In [None]:
filename = 'f:/data/sat_data/aquarius/Q2011237.L3m_DAY_SCIA_V5.0.RAIN_MASK_SSS_1deg'
ds = xr.open_dataset(filename)
ds.close()
ds.coords['phony_dim_0']=np.arange(90,-90,-1)
ds.coords['phony_dim_1']=np.arange(-180,180)
ds = ds.rename({'phony_dim_0':'lat','phony_dim_1':'lon'})
ds.l3m_data.plot()

In [None]:
#ds_sat = xr.open_mfdataset(filelist,coords='minimal')
ds_sat = xr.open_dataset(filelist[0])
#ds_sat = ds_sat.isel(depth=0).drop('year').rename({'latitude':'lat'}).rename({'longitude':'lon'})
#ds_sat = ds_sat.sortby('lat').sel(lon=slice(20.0,379.00))
#ds_sat.coords['lon'] = (ds_sat.coords['lon'] + 180) % 360 - 180
#ds_sat = ds_sat.sortby('lon')
ds_sat.close()
print(ds_sat)  #check units 
ds_sat


1. First let's subset the data to make it smaller to deal with by using the cruise lat/lons




In [None]:
#Step 1 from above
subset = ds_sat.sel(lon=slice(ds_usv_subset.lon.min().data,ds_usv_subset.lon.max().data),
                    lat=slice(ds_usv_subset.lat.min().data,ds_usv_subset.lat.max().data))

#now collocate with usv lat and lons
ds_collocated = subset.interp(lat=ds_usv_subset.lat,lon=ds_usv_subset.lon,time=ds_usv_subset.time,method='linear')
ds_collocated_nearest = subset.interp(lat=ds_usv_subset.lat,lon=ds_usv_subset.lon,time=ds_usv_subset.time,method='nearest')


# A larger STD that isn't reflective of uncertainty in the observation
The collocation above will result in multiple USV data points matched with a single satellite
observation.    The USV is sampling every 1 min and approximately few meters, while the satellite
is an average over a footprint that is interpolated onto a daily mean map.  While calculating the mean would results in a valid mean, the STD would be higher and consist of a component that reflects the uncertainty of the USV and the satellite and a component that reflects the natural variability in the region that is sampled by the USV

Below we use the 'nearest' collocation results to identify when multiple USV data are collcated to
a single satellite observation.
This code goes through the data and creates averages of the USV data that match the single CCMP collocated value.


In [None]:
ilen,index = ds_collocated_nearest.dims['time'],0
ds_tem = ds_collocated_nearest.copy(deep=True)
duu, duv1, duv2, dlat, dlon, dut = [],[],[],[],[],np.empty((),dtype='datetime64')
while index <= ilen-2:
    index += 1
    if np.isnan(ds_collocated_nearest.u[index]):
        continue
    if np.isnan(ds_tem.u[index]):
        continue
   # print(index, ilen)
    iend = index + 730
    if iend > ilen-1:
        iend = ilen-1
    ds_tem_subset = ds_tem.u[index:iend]
    ds_tem_subset2 = ds_tem.v[index:iend]
    ds_usv_subset2ucur = ds_usv_subset.vel_east_30m[index:iend]
    ds_usv_subset2vcur = ds_usv_subset.vel_north_30m[index:iend]
    ds_usv_subset2lat = ds_usv_subset.lat[index:iend]
    ds_usv_subset2lon = ds_usv_subset.lon[index:iend]
    ds_usv_subset2time = ds_usv_subset.time[index:iend]
    cond = ((ds_tem_subset==ds_collocated_nearest.u[index]) & (ds_tem_subset2==ds_collocated_nearest.v[index]))
    notcond = np.logical_not(cond)
    #cond = ((ds_tem.analysed_sst==ds_collocated_nearest.analysed_sst[index]))
    #notcond = np.logical_not(cond)
    masked = ds_tem_subset.where(cond)
    if masked.sum().data==0:  #don't do if data not found
        continue
    masked_usvucur = ds_usv_subset2ucur.where(cond,drop=True)
    masked_usvvcur = ds_usv_subset2vcur.where(cond,drop=True)
    masked_usvlat = ds_usv_subset2lat.where(cond,drop=True)
    masked_usvlon = ds_usv_subset2lon.where(cond,drop=True)
    masked_usvtime = ds_usv_subset2time.where(cond,drop=True)
    duu=np.append(duu,masked_usvucur.mean().data)
    duv1=np.append(duv1,masked_usvvcur.mean().data)
    dlat=np.append(dlat,masked_usvlat.mean().data)
    dlon=np.append(dlon,masked_usvlon.mean().data)
    tdif = masked_usvtime[-1].data-masked_usvtime[0].data
    mtime=masked_usvtime[0].data+np.timedelta64(tdif/2,'ns')
#    if mtime>dut.max():
#        print(index,dut.shape[0],masked_usvtime[0].data,masked_usvtime[-1].data-masked_usvtime[0].data)
    dut=np.append(dut,mtime)
    ds_tem.u[index:iend]=ds_tem.u.where(notcond)
    ds_tem.v[index:iend]=ds_tem.v.where(notcond)
dut2 = dut[1:]  #remove first data point which is a repeat from what array defined       
ds_new=xr.Dataset(data_vars={'vel_east': ('time',duu),'vel_north':('time',duv1),
                             'lon': ('time',dlon),
                             'lat': ('time',dlat)},
                  coords={'time':dut2})
ds_new.to_netcdf('F:/data/cruise_data/saildrone/baja-2018/oscar_downsampled_usv_data2.nc')

# redo the collocation
Now, redo the collocation, using 'linear' interpolation using the averaged data.  This will interpolate the data temporally onto the USV sampling which has been averaged to the satellite data grid points

In [None]:
ds_collocated_averaged = subset.interp(lat=ds_new.lat,lon=ds_new.lon,time=ds_new.time,method='linear')
ds_collocated_averaged

In [None]:
ds_collocated_averaged.to_netcdf('F:/data/cruise_data/saildrone/baja-2018/oscar_downsampled_collocated_usv_data3.nc')


In [None]:
ds_collocated_averaged['spd']=np.sqrt(ds_collocated_averaged.u**2+ds_collocated_averaged.v**2)
ds_new['spd'] = np.sqrt(ds_new.vel_east**2+ds_new.vel_north**2)
ds_collocated_averaged['dir']=np.arctan2(ds_collocated_averaged.v,ds_collocated_averaged.u)*180./np.pi
ds_new['dir'] = np.arctan2(ds_new.vel_north,ds_new.vel_east)*180./np.pi

usv_spd = ds_new.spd
sat_spd = ds_collocated_averaged.spd
usv_dir = ds_new.dir
sat_dir = ds_collocated_averaged.dir
dif_spd,dif_dir = usv_spd - sat_spd, usv_dir - sat_dir
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
dif_dir[cond]-=360
dif_dir[cond2]+=360
print('mean,std dif speed',[dif_spd.mean().data,dif_spd.std().data])
print('mean,std dir',[dif_dir.mean().data,dif_dir.std().data,dif_spd.shape[0]])

usv_spd = ds_new.spd
sat_spd = ds_collocated_averaged.spd
usv_dir = ds_new.dir
sat_dir = ds_collocated_averaged.dir
dif_spd,dif_dir = usv_spd - sat_spd, usv_dir - sat_dir
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
dif_dir[cond]-=360
dif_dir[cond2]+=360

cond = (np.isfinite(usv_spd) & np.isfinite(sat_spd))
usv_spd = usv_spd[cond]
sat_spd = sat_spd[cond]
usv_dir = usv_dir[cond]
sat_dir = sat_dir[cond]

sdif = dif_spd.dropna('time')
sdifcor = np.corrcoef(sat_spd,usv_spd)[0,1]
std_robust = np.nanmedian(np.abs(sdif - np.nanmedian(sdif))) * 1.482602218505602
ilen = sdif.shape[0]
print([sdif.mean().data,sdif.median().data,sdifcor,sdif.std().data,std_robust,
                    np.abs(sdif).mean().data,sdif.shape[0]])
sdif = dif_dir.dropna('time')
sdifcor = np.corrcoef(sat_dir,usv_dir)[0,1]
std_robust = np.nanmedian(np.abs(sdif - np.nanmedian(sdif))) * 1.482602218505602
ilen = sdif.shape[0]
print([sdif.mean().data,sdif.median().data,sdifcor,sdif.std().data,std_robust,
                    np.abs(sdif).mean().data,sdif.shape[0]])


In [None]:
ds_new

In [None]:

# Learn about API authentication here: https://plot.ly/python/getting-started
# Find your api_key here: https://plot.ly/settings/api
# MatPlotlib
import matplotlib.pyplot as plt
from matplotlib import pylab
# Scientific libraries
from numpy import arange,array,ones
from scipy import stats

usv_spd = ds_new.spd
sat_spd = ds_collocated_averaged.spd
usv_dir = ds_new.dir
sat_dir = ds_collocated_averaged.dir
dif_spd,dif_dir = usv_spd - sat_spd, usv_dir - sat_dir
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
cond,cond2 = (dif_dir > 180),(dif_dir < -180)
dif_dir[cond]-=360
dif_dir[cond2]+=360

usv_ucur = ds_new.vel_east
usv_vcur = ds_new.vel_north
sat_ucur = ds_collocated_averaged.u
sat_vcur = ds_collocated_averaged.v
usv_spd  = np.sqrt(usv_ucur**2 + usv_vcur**2)
sat_spd  = np.sqrt(sat_ucur**2 + sat_vcur**2)

cond = (np.isfinite(usv_spd) & np.isfinite(sat_spd))
usv_spd = usv_spd[cond]
sat_spd = sat_spd[cond]
usv_dir = usv_dir[cond]
sat_dir = sat_dir[cond]

xi = usv_spd.data
A = array([ xi, ones(sat_spd.shape[0])])
y = sat_spd.data
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
line = slope*xi+intercept
plt.plot(xi,y,'o', xi, line)
plt.ylim(-.1,1),plt.xlim(-.1,1)
plt.grid()
plt.xlabel('USV speed')
plt.ylabel('OSCAR speed')
print(slope,intercept,r_value,p_value,std_err)

fig_fname='F:/data/cruise_data/saildrone/baja-2018/figs/oscar_usv_big_spd.png'
plt.savefig(fig_fname, transparent=False, format='png')

#pylab.title('Linear Fit with Matplotlib')
#ax = plt.gca()
#ax.set_axis_bgcolor((0.898, 0.898, 0.898))
#fig = plt.gcf()
#py.plot_mpl(fig, filename='linear-Fit-with-matplotlib')



In [None]:
usv_ucur = ds_new.vel_east
usv_vcur = ds_new.vel_north
sat_ucur = ds_collocated_averaged.u
sat_vcur = ds_collocated_averaged.v

cond = (np.isfinite(usv_ucur) & np.isfinite(sat_ucur) & ( usv_vcur<.2))
usv_ucur = usv_ucur[cond]
sat_ucur = sat_ucur[cond]
usv_vcur = usv_vcur[cond]
sat_vcur = sat_vcur[cond]

xi = usv_ucur.data
A = array([ xi, ones(usv_ucur.shape[0])])
y = sat_ucur.data
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
line = slope*xi+intercept
plt.subplot(231)
plt.plot(xi,y,'o', xi, line)
plt.ylim(-.5,.5),plt.xlim(-.5,.5)
plt.grid()
plt.xlabel('USV east speed')
plt.ylabel('SAT east speed')
print(slope,intercept,r_value,p_value,std_err)
xi = usv_vcur.data
A = array([ xi, ones(usv_vcur.shape[0])])
y = sat_vcur.data
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
line = slope*xi+intercept
plt.subplot(232)
plt.plot(xi,y,'o', xi, line)
plt.xlabel('USV north speed')
plt.ylabel('SAT north speed')
print(slope,intercept,r_value,p_value,std_err)
plt.ylim(-.5,.5),plt.xlim(-.5,.5)
plt.grid()
xi = np.sqrt(usv_vcur.data**2 + usv_ucur.data**2)
A = array([ xi, ones(usv_vcur.shape[0])])
y = np.sqrt(sat_vcur.data**2+sat_ucur.data**2)
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
line = slope*xi+intercept
plt.subplot(233)
plt.plot(xi,y,'o', xi, line)
plt.ylim(-.1,.75),plt.xlim(-.1,.75)
plt.grid()
plt.xlabel('USV speed')
plt.ylabel('SAT speed')
print(slope,intercept,r_value,p_value,std_err)
fig_fname='F:/data/cruise_data/saildrone/baja-2018/figs/sat_current_U_V_speed.png'
plt.savefig(fig_fname, transparent=False, format='png')


In [None]:
fig, ax = plt.subplots(figsize=(5,4))
ax.plot(usv_spd,usv_spd - sat_spd,'.')
ax.grid(True)
ax.set_xlabel('USV current speed (ms$^{-1}$)')
ax.set_ylabel('USV - Sat current speed (ms$^{-1}$)')
fig_fname='F:/data/cruise_data/saildrone/baja-2018/figs/sat_current_USV_minus_Sat_fnct_USV.png'
fig.savefig(fig_fname, transparent=False, format='png')


In [None]:
fig, ax = plt.subplots(figsize=(5,4))
ax.plot(usv_spd,usv_dir - sat_dir,'.')
ax.set_xlabel('USV current speed (ms$^{-1}$)')
ax.set_ylabel('USV - Sat current direction (deg)')
fig_fname='F:/data/cruise_data/saildrone/baja-2018/figs/sat_current_both_bias.png'
fig.savefig(fig_fname, transparent=False, format='png')


In [None]:
ds_usv_subset['cur_spd']=np.sqrt(ds_usv_subset.vel_east**2+ds_usv_subset.vel_north**2)
ds_usv_6hr=ds_usv_subset.resample(time='6H').mean()
ds_usv_6hr['cur_spd']=np.sqrt(ds_usv_6hr.vel_east**2+ds_usv_6hr.vel_north**2)


plt.plot(ds_collocated_averaged.time[cond],sat_spd,'.-')
plt.plot(ds_collocated_averaged.time[cond],usv_spd,'.-')
plt.plot(ds_usv_6hr.time,ds_usv_6hr.cur_spd)
plt.legend({'OSCAR','USV','USV-ave'})
fig_fname='F:/data/cruise_data/saildrone/baja-2018/figs/sat_current_timeseries_bias.png'
fig.savefig(fig_fname, transparent=False, format='png')


In [None]:
ds_usv.spd_30m.sel(time=slice('2018-05-05','2018-05-09')).plot()
