# This is the Saildrone and CCMP collocation code. 


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import xarray as xr

def get_ccmp_filename(date):
    dir_ccmp='F:/data/sat_data/ccmp/v02.0/'
    syr, smon, sdym = str(date.dt.year.data), str(date.dt.month.data).zfill(2), str(date.dt.day.data).zfill(2)
    ccmp_filename =dir_ccmp + 'Y' + syr + '/M' + smon + '/CCMP_Wind_Analysis_' + syr + smon + sdym + '_V02.0_L3.0_RSS.nc'
    exists = os.path.isfile(ccmp_filename)
    if exists==False:
        ccmp_filename =dir_ccmp + 'Y' + syr + '/M' + smon + '/CCMP_RT_Wind_Analysis_' + syr + smon + sdym + '_V02.0_L3.0_RSS.nc'
        exists = os.path.isfile(ccmp_filename)
    return ccmp_filename, exists


# Read in USV data
Read in the Saildrone USV file either from a local disc or using OpenDAP.

There are 6 NaN values in the lat/lon data arrays, interpolate across these

We want to collocate with wind vectors for this example,  but the wind vectors are only every 10 minutes rather than every minute, so use .dropna to remove all values in the dataset from all dataarrays when wind vectors aren't availalbe

In [None]:
filename_collocation_data = 'F:/data/cruise_data/saildrone/baja-2018/ccmp_collocation_data.nc'
#filename_usv = 'https://podaac-opendap.jpl.nasa.gov/opendap/hyrax/allData/insitu/L2/saildrone/Baja/saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc'
filename_usv='f:/data/cruise_data/saildrone/baja-2018/saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc'
ds_usv = xr.open_dataset(filename_usv)
ds_usv.close()
ds_usv = ds_usv.isel(trajectory=0).swap_dims({'obs':'time'}).rename({'longitude':'lon','latitude':'lat'})
ds_usv = ds_usv.sel(time=slice('2018-04-11T18:30',ds_usv.time[-1].data))  #first part of data is when USV being towed, elminiate
ds_usv['lon'] = ds_usv.lon.interpolate_na(dim='time',method='linear') #there are 6 nan values
ds_usv['lat'] = ds_usv.lat.interpolate_na(dim='time',method='linear')
ds_usv['wind_speed']=np.sqrt(ds_usv.UWND_MEAN**2+ds_usv.VWND_MEAN**2)
ds_usv['wind_dir']=np.arctan2(ds_usv.VWND_MEAN,ds_usv.UWND_MEAN)*180/np.pi
ds_usv_subset = ds_usv.dropna(dim='time',subset={'UWND_MEAN'})   #get rid of all the nan
#print(ds_usv_subset.UWND_MEAN[2000:2010].values)

In order to use open_mfdataset you need to either provide a path or a list of filenames to input

Here we use the USV cruise start and end date to read in all CCMP wind vector data for that period

In [None]:
read_date,end_date = ds_usv_subset.time.min(),ds_usv_subset.time.max()
filelist = []
while read_date<=(end_date+np.timedelta64(1,'D')):
    tem_filename, exists = get_ccmp_filename(read_date)
    if exists:
        filelist.append(tem_filename)
    read_date=read_date+np.timedelta64(1,'D')
#print(filelist)

# Read in CCMP data
Read in data using open_mfdataset with the option coords='minimal'

CCMP data uses long names for lat/lon so change that and then chage coordinate system to -180 to 180 from 0 to 360

The dataset is printed out and you can see that rather than straight xarray data array for each of the data variables open_mfdataset using dask arrays

In [None]:
ds_sat = xr.open_mfdataset(filelist,coords='minimal')
ds_sat = ds_sat.rename({'longitude':'lon','latitude':'lat'}) 
ds_sat = ds_sat.assign_coords(lon=(((ds_sat.lon + 180) % 360) - 180)).sortby('lon').sortby('lat')
ds_sat

# Xarray interpolation won't run on chunked dimensions.  
1. First let's subset the data to make it smaller to deal with by using the cruise lat/lons

1. Now load the data into memory (de-Dask-ify) it  


In [None]:
#Step 1 from above
subset = ds_sat.sel(lon=slice(ds_usv_subset.lon.min().data,ds_usv_subset.lon.max().data),
                    lat=slice(ds_usv_subset.lat.min().data,ds_usv_subset.lat.max().data))
#Step 2 from above
subset.load()
#now collocate with usv lat and lons
ds_collocated = subset.interp(lat=ds_usv_subset.lat,lon=ds_usv_subset.lon,time=ds_usv_subset.time,method='linear')
ds_collocated_nearest = subset.interp(lat=ds_usv_subset.lat,lon=ds_usv_subset.lon,time=ds_usv_subset.time,method='nearest')


In [None]:
ds_collocated['wind_speed']=np.sqrt(ds_collocated.uwnd**2+ds_collocated.vwnd**2)
ds_collocated['wind_dir']=np.arctan2(ds_collocated.vwnd,ds_collocated.uwnd)*180/np.pi
ds_collocated_nearest['wind_speed']=np.sqrt(ds_collocated_nearest.uwnd**2+ds_collocated_nearest.vwnd**2)
ds_collocated_nearest['wind_dir']=np.arctan2(ds_collocated_nearest.vwnd,ds_collocated_nearest.uwnd)*180/np.pi


In [None]:
print((ds_collocated.uwnd-ds_usv_subset.UWND_MEAN).mean().data,(ds_collocated.uwnd-ds_usv_subset.UWND_MEAN).std().data)
print((ds_collocated.vwnd-ds_usv_subset.VWND_MEAN).mean().data,(ds_collocated.vwnd-ds_usv_subset.VWND_MEAN).std().data)
print((ds_collocated.wind_speed-ds_usv_subset.wind_speed).mean().data,(ds_collocated.wind_speed-ds_usv_subset.wind_speed).std().data)


In [None]:
plt.plot(ds_collocated.time,ds_collocated.wind_speed-ds_usv_subset.wind_speed,'.-')
plt.xticks(rotation='vertical')

In [None]:
plt.plot(ds_collocated_nearest.time,ds_collocated_nearest.wind_speed-ds_usv_subset.wind_speed,'.-')
plt.xticks(rotation='vertical')

In [None]:
#the idea here is that the USV data is being collocated repeatedly to the same CCMP data
#rather than just interpolate the CCMP data onto the USV data, use nearest to interpolate the nearest CCMP value
#now you can tell where the repeated data points are being collocated and this code
#goes through the data and creates averages of the USV data that match the single CCMP collocated value
ilen,index = ds_collocated_nearest.dims['time'],0
ds_tem = ds_collocated_nearest.copy(deep=True)
dus, duu, dvs, dvu, dut = [],[],[],[],[]
while index < ilen:
    if np.isnan(ds_collocated_nearest.uwnd[index]):
        continue
    test = ds_collocated_nearest.where((ds_tem.uwnd==ds_collocated_nearest.uwnd[index])&(ds_tem.vwnd==ds_collocated_nearest.vwnd[index]))
    test = test/test
    if test.uwnd.sum()>0:
        np.append(dus,ds_collocated_nearest.uwnd[index])
        np.append(duu,(ds_usv_subset.UWND_MEAN*test.uwnd).mean().data)
        np.append(dvs,ds_collocated_nearest.vwnd[index])
        np.append(dvs,(ds_usv_subset.VWND_MEAN*test.vwnd()).mean().data)
        np.append(dut,ds_collocated_nearest.time[index])
        ds_tem=ds_tem.where(np.isnan(test),np.nan)  #you have used values, so set to nan
    index += 1
    


In [None]:
#testing code above
ds_tem = ds_collocated_nearest.copy(deep=True)
print(ds_collocated_nearest.uwnd[1055].data)
print(ds_collocated_nearest.uwnd[1050:1150].data)
test = ds_collocated_nearest.where((ds_collocated_nearest.uwnd==ds_collocated_nearest.uwnd[1055])&(ds_collocated_nearest.vwnd==ds_collocated_nearest.vwnd[1055]))
test = test/test
print(test.uwnd[1050:1150].data)
ds_tem=ds_tem.where(np.isnan(test),np.nan)
print(ds_tem.uwnd[1050:1150].data)
print((ds_usv_subset.UWND_MEAN*test.uwnd).mean())
print((ds_usv_subset.VWND_MEAN*test.vwnd).mean())


In [None]:
ds_usv_subset

In [None]:
print(ds_sat.lon[0:5])
print(ds_sat.lon[1]-ds_sat.lon[0])
print(ds_sat.lat[0:5])
print(ds_sat.lat[1]-ds_sat.lat[0])

In [None]:
ds_usv_subset_test = ds_usv_subset.copy(deep=True)
ds_usv_subset_test['lon']=((np.round((ds_usv_subset.lon+179.875)/.25+1)-1)*.25)-179.875
ds_usv_subset_test['lat']=((np.round((ds_usv_subset.lat+78.375)/.25+1)-1)*.25)-78.375

In [None]:
plt.plot(ds_usv_subset.lon[0:500],ds_usv_subset.lat[0:500],'.-')
plt.plot(ds_usv_subset_test.lon[0:500],ds_usv_subset_test.lat[0:500],'.')

In [None]:
test = ds_usv_subset.interp(lon = ds_usv_subset_test.lon) #,lat = ds_usv_subset_test.lat)

In [None]:
lats_usv=ds_usv.latitude.values
lons_usv=ds_usv.longitude.values
skin=ds_usv.TEMP_IR_UNCOR_MEAN.values
tair=ds_usv.TEMP_AIR_MEAN.values
bulk1=ds_usv.TEMP_CTD_MEAN.values
bulk2=ds_usv.TEMP_O2_MEAN.values
uspd=ds_usv.UWND_MEAN.values
vspd=ds_usv.VWND_MEAN.values
wing=ds_usv.HDG_WING.values
wing_ang=ds_usv.WING_ANGLE.values
pitch=ds_usv.PITCH.values
heading=ds_usv.HDG_WING.values
yaw_heading=ds_usv.HDG.values
roll=ds_usv.ROLL.values
tdim=len(skin)
wdir=np.zeros(tdim)
for i in range(0,tdim):
    wdir[i]=atan2(vspd[i],uspd[i])*180/3.14159
wspd=(uspd**2+vspd**2)**.5

In [None]:
ds_sat = get_ccmp(2018,170)


In [None]:
#get lat/lon from random file, it doesn't change
ds_sat = get_ccmp(2003,1)
lats_sat = ds_sat.lat
lons_sat = ds_sat.lon
ds_sat.close()
#initialize variables
latli_sv=-99
lonli_sv=-99
ihr_sv=-99
col_count=0  #initialize
gsst=np.zeros(tdim)
gsst_num=np.zeros(tdim)
#get goes data collocated with usv positions and times
for i in range(0,10): #tdim):
    latli = np.argmin( np.abs( lats_sat - lats_usv[i] ) )
    lonli = np.argmin( np.abs( lons_sat - lons_usv[i] ) )
    idy = ds_usv.time[i].dt.day.data        
    if (latli!=latli_sv) or (lonli_sv!=lonli) or (idy_sv!=idy):  #need to read in new data
        ds_sat = get_ccmp(ds_usv.time[i].dt.year.data,ds_usv.time[i].dt.dayofyear.data)
        col_count += 1
        gsst[i] = sat_sst
        gsst_num[i] = col_count  #this is to keep track of when a new file read in or new collocation point
        nc.close()
        latli_sv=latli
        lonli_sv=lonli
        ihr_sv=ds_usv.time[i].dt.hour.data
        print(i,tdim,lonli.data, latli.data,ihr_sv,gsst[i])
    else:  #collocation is to same gsst point
        gsst[i] = sat_sst 
        gsst_num[i] = col_count  #this is to keep track of when a new file read in or new collocation point


In [None]:
#put into xarray dataset similiar in format to usv data
gsst2=np.zeros((1,tdim))
gsst_tem=np.zeros((1,tdim))
gsst2[0,:]=gsst
data_goes = xr.DataArray(gsst2, coords={'trajectory': ds_usv.trajectory,'obs': ds_usv.obs}, dims=('trajectory', 'obs'))
gsst_tem[0,:]=gsst_num
num_goes = xr.DataArray(gsst_tem, coords={'trajectory': ds_usv.trajectory,'obs': ds_usv.obs}, dims=('trajectory', 'obs'))
xr_gsst = xr.Dataset({'goes_sst': data_goes, 'collocation_index': num_goes}, 
                     coords={'trajectory':ds_usv.trajectory, 'time':ds_usv.time, 'latitude':ds_usv.latitude, 'longitude':ds_usv.longitude})   


In [None]:
xr_gsst



In [None]:
#np.save(filename_goes_sst, gsst)