In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import xarray as xr
import cartopy.crs as ccrs
from pyresample.geometry import AreaDefinition
from pyresample.geometry import GridDefinition
from pyresample import image, geometry, load_area, save_quicklook, SwathDefinition, area_def2basemap
from pyresample.kd_tree import resample_nearest
#from scipy import spatial
sys.path.append('../saildrone/subroutines/')
from read_routines import read_all_usv, read_one_usv, add_coll_vars,get_filelist_l2p,get_orbital_data_l2p
import warnings
warnings.simplefilter('ignore') # filter some warning messages
from glob import glob

# Read in All Saildrone cruises downloaded from https://data.saildrone.com/data/sets
- 2017 onwards, note that earlier data is going to lack insruments and be poorer data quality in general
- For this code I want to develop a routine that reads in all the different datasets and creates a standardized set
- It may work best to first read each of the files individually into a dictionary 
- then go through each dataset finding all variable names
- I decided to put all SST into TEMP_CTD_MEAN and same for Salinity so there is a single variable name
- this still preserves all the dataset information

In [None]:
dir_data = 'C:/Users/gentemann/Google Drive/public/2019_saildrone/' #'f:/data/cruise_data/saildrone/saildrone_data/'
dir_data_pattern = 'C:/Users/gentemann/Google Drive/public/2019_saildrone/*.nc' 

dir_out = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day/'
dir_out2 = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest/'

data_dict = read_all_usv(dir_data_pattern)
#data_dict = add_coll_vars(data_dict)


# Check on lat/lon range, names, etc.

In [None]:
#RSS test open
#file = 'F:/data/sat_data/smap/SSS/L3/RSS/V4/8day_running/SCI/2016/001/RSS_smap_SSS_L3_8day_running_2016_005_FNL_v04.0.nc'
#ds = xr.open_dataset(file)
#ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
#ds = ds.sortby(ds.lon)
#ds.close()  


# Collocate SMAP RSS 8day

In [None]:
#RSS
#get list of all filenames in directory
adir = 'F:/data/sat_data/smap/SSS/L3/RSS/V4/8day_running/SCI/**/**/*.nc'
files = [x for x in glob(adir)]
print('number of file:',len(files))

ds = xr.open_mfdataset(files,combine='nested',concat_dim='time')
ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
ds = ds.sortby(ds.lon)
ds.close()  


# Collocate using .interp linear interpolation

In [None]:
%%time
for iname,name in enumerate(data_dict):
    print(iname)
    ds_usv = data_dict[name].copy(deep=True)
    ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    tem = ds_usv.lat.ffill(dim='time')
    tem = ds_usv.lat.bfill(dim='time')
    t1,t2=ds_usv.time.min().data-np.timedelta64(8,'D'),ds_usv.time.max().data+np.timedelta64(8,'D')
    x1,x2=ds_usv.lon.min().data,ds_usv.lon.max().data
    y1,y2=ds_usv.lat.min().data,ds_usv.lat.max().data
    print(t1,t2)
    ds_sat = ds.sel(time=slice(t1,t2),lat=slice(y1,y2),lon=slice(x1,x2)).load()   
    ds_interp = ds_sat.interp(time=ds_usv.time,lat=ds_usv.lat,lon=ds_usv.lon,method='linear')#.interp(method='nearest')

    #add saildrone data to interpolated sat data
    ds_interp = ds_interp.reset_coords(names={'lat','lon'})
    for var in ds_interp:
        ds_usv['sat_'+var]=ds_interp[var]
    #output
    fout = dir_out+name+'_RSS8dy'+'.nc'
    ds_usv.to_netcdf(fout)
    

# Collocate using .interp nearest neighbor interpolation

In [None]:
%%time
for iname,name in enumerate(data_dict):
    print(iname)
    ds_usv = data_dict[name].copy(deep=True)
    ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    tem = ds_usv.lat.ffill(dim='time')
    tem = ds_usv.lat.bfill(dim='time')
    t1,t2=ds_usv.time.min().data-np.timedelta64(8,'D'),ds_usv.time.max().data+np.timedelta64(8,'D')
    x1,x2=ds_usv.lon.min().data,ds_usv.lon.max().data
    y1,y2=ds_usv.lat.min().data,ds_usv.lat.max().data
    print(t1,t2)
    ds_sat = ds.sel(time=slice(t1,t2),lat=slice(y1,y2),lon=slice(x1,x2)).load()   
    ds_interp = ds_sat.interp(time=ds_usv.time,lat=ds_usv.lat,lon=ds_usv.lon,method='nearest')#.interp(method='nearest')

    #add saildrone data to interpolated sat data
    ds_interp = ds_interp.reset_coords(names={'lat','lon'})
    for var in ds_interp:
        ds_usv['sat_'+var]=ds_interp[var]
    #output
    fout = dir_out2+name+'_RSS8dy'+'.nc'
    ds_usv.to_netcdf(fout)

# Collocation SMAP JPL 8day

In [None]:
#JPL
adir = 'F:/data/sat_data/smap/SSS/L3/JPL/V4.3/8day_running/**/**/*4.3.nc'
files = [x for x in glob(adir)]
print('number of file:',len(files))

ds = xr.open_mfdataset(files,combine='nested',concat_dim='time')
ds = ds.rename({'latitude':'lat','longitude':'lon'})
ds = ds.sortby(ds.lat)
ds.close()  
ds

In [None]:
%%time
for iname,name in enumerate(data_dict):
    print(iname)
    ds_usv = data_dict[name].copy(deep=True)
    ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    tem = ds_usv.lat.ffill(dim='time')
    tem = ds_usv.lat.bfill(dim='time')
    t1,t2=ds_usv.time.min().data-np.timedelta64(8,'D'),ds_usv.time.max().data+np.timedelta64(8,'D')
    x1,x2=ds_usv.lon.min().data,ds_usv.lon.max().data
    y1,y2=ds_usv.lat.min().data,ds_usv.lat.max().data
    print(t1,t2)
    ds_sat = ds.sel(time=slice(t1,t2),lat=slice(y1,y2),lon=slice(x1,x2)).load()   
    ds_interp = ds_sat.interp(time=ds_usv.time,lat=ds_usv.lat,lon=ds_usv.lon,method='linear')#.interp(method='nearest')
    #add saildrone data to interpolated sat data
    ds_interp = ds_interp.reset_coords(names={'lat','lon'})
    for var in ds_interp:
        ds_usv['sat_'+var]=ds_interp[var]
    
    fout = dir_out+name+'_JPL8dy'+'.nc'
    ds_usv.to_netcdf(fout)

In [None]:
%%time
for iname,name in enumerate(data_dict):
    print(iname)
    ds_usv = data_dict[name].copy(deep=True)
    ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear').ffill(dim='time').bfill(dim='time')
    tem = ds_usv.lat.ffill(dim='time')
    tem = ds_usv.lat.bfill(dim='time')
    t1,t2=ds_usv.time.min().data-np.timedelta64(8,'D'),ds_usv.time.max().data+np.timedelta64(8,'D')
    x1,x2=ds_usv.lon.min().data,ds_usv.lon.max().data
    y1,y2=ds_usv.lat.min().data,ds_usv.lat.max().data
    print(t1,t2)
    ds_sat = ds.sel(time=slice(t1,t2),lat=slice(y1,y2),lon=slice(x1,x2)).load()   
    ds_interp = ds_sat.interp(time=ds_usv.time,lat=ds_usv.lat,lon=ds_usv.lon,method='nearest')#.interp(method='nearest')
    #add saildrone data to interpolated sat data
    ds_interp = ds_interp.reset_coords(names={'lat','lon'})
    for var in ds_interp:
        ds_usv['sat_'+var]=ds_interp[var]
    fout = dir_out2+name+'_JPL8dy'+'.nc'
    ds_usv.to_netcdf(fout)

# remove repeats

In [None]:
data_dir = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest/'
data_dir_out = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest_norepeat/'
filenames = [x for x in glob(data_dir+'*.nc')]
filenames

In [None]:
print(filenames[1])
ds = xr.open_dataset(filenames[1])
ds.close()
ds

In [None]:
loc=['lat','lon']
for iname,name in enumerate(filenames):
    print(iname,len(filenames))
    i = name.find('\\')
    fout = data_dir_out + name[i+1:-3]+'norep.nc' 
    ds = xr.open_dataset(name)
    ds.close()
    if 'RSS' in name:  
        ds_tem2 = ds.where((ds.sat_sss_smap<50) & (ds.sat_sss_smap>1),drop=True)    
        isv=0
        while len(ds_tem2.time)>1:
            i=0
            cond = ((ds_tem2.sat_sss_smap==ds_tem2.sat_sss_smap[i]) 
                    & (ds_tem2.sat_sss_smap_uncertainty==ds_tem2.sat_sss_smap_uncertainty[i]) 
                    & (ds_tem2.sat_sss_smap_40km==ds_tem2.sat_sss_smap_40km[i]))
            subset = ds_tem2.where(cond,drop=True)  #repeat obs
            ds_mn = subset.mean(keep_attrs=True,skipna=True)
            ds_mn['time'] = subset.time.mean()
            ds_mn = ds_mn.assign_coords({'ob':isv})
            if isv==0:
                ds_mn2 = ds_mn
            else:
                ds_mn2 = xr.concat([ds_mn2,ds_mn],dim='ob')
            isv = isv+1
            ds_tem2 = ds_tem2.where(~cond,drop=True)  #data with repeat obs removed
    else:
        ds_tem2 = ds.where((ds.sat_smap_sss<50) & (ds.sat_smap_sss>1),drop=True)    
        isv=0
        while len(ds_tem2.time)>1:
            i=0
            cond = ((ds_tem2.sat_smap_sss==ds_tem2.sat_smap_sss[i]) 
                    & (ds_tem2.sat_anc_sst==ds_tem2.sat_anc_sst[i]) 
                    & (ds_tem2.sat_anc_sss==ds_tem2.sat_anc_sss[i]))
            subset = ds_tem2.where(cond,drop=True)  #repeat obs
            ds_mn = subset.mean(keep_attrs=True,skipna=True)
            ds_mn['time'] = subset.time.mean()
            ds_mn = ds_mn.assign_coords({'ob':isv})
            if isv==0:
                ds_mn2 = ds_mn
            else:
                ds_mn2 = xr.concat([ds_mn2,ds_mn],dim='ob')
            isv = isv+1
            ds_tem2 = ds_tem2.where(~cond,drop=True)  #data with repeat obs removed
    ds_mn2.to_netcdf(fout)

# TESTING

In [None]:
fname = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest/saildrone-gen_5-arctic_misst_2019-sd1036-20190514T230000-20191011T183000-1_minutes-v1.1575336154680_JPL8dy.nc'
ds_tem = xr.open_dataset(fname)
plt.plot(ds_tem.lon,ds.lat)

In [None]:
#fix remove RSS data from JPL collocation
ds = ds.drop({'sat_nobs','sat_nobs_40km','sat_sss_smap','sat_sss_smap_uncertainty','sat_sss_smap_40km','sat_sss_ref','sat_gland','sat_fland','sat_gice','sat_surtep'})

In [None]:
ds_interp

In [None]:
ds_usv

In [None]:
plt.plot(ds_usv.time,ds_usv.SAL_CTD_MEAN,'b')
plt.plot(ds_usv.time,ds_usv.sat_smap_sss,'r.')

In [None]:
tem = ds.sel(time='2019-08-01',lat=slice(30,55),lon=slice(-130,-110))
plt.pcolormesh(tem.lon,tem.lat,tem.smap_sss[0,:,:])
plt.plot(ds_usv.lon,ds_usv.lat)

In [None]:
import xarray as xr
file = 'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest/saildrone-gen_5-atomic_eurec4a_2020-sd1026-20200117T000000-20200302T235959-1_minutes-v1.1589306725934_JPL8dy.nc'
ds = xr.open_dataset(file)
ds

In [None]:
#for name in data_dict:
#    print(name)
ds2 = data_dict['saildrone-gen_5-atomic_eurec4a_2020-sd1026-20200117T000000-20200302T235959-1_minutes-v1.1589306725934']
ds2

In [None]:
dir_list = ['F:/data/cruise_data/saildrone/sss/sss_collocations_8day/',
        'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest/',
        'F:/data/cruise_data/saildrone/sss/sss_collocations_8day_nearest_norepeat/']
files = glob(dir_list[0]+'*.nc')
file = files[2]
#if 'JPL' in file:
print(file)
ds = xr.open_dataset(file)
ds.close()
#ds = ds.drop({'sat_nobs','sat_nobs_40km','sat_sss_smap','sat_sss_smap_uncertainty','sat_sss_smap_40km','sat_sss_ref','sat_gland','sat_fland','sat_gice','sat_surtep' })
print(ds)
#ds.to_netcdf(file)

In [None]:
ds