In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import datetime as dt
import xarray as xr
from math import atan2, log

In [2]:
#input filename
dir_in='f:/data/cruise_data/saildrone/baja-2018/daily_files/sd-1002/2018/'
dir_out='f:/data/cruise_data/saildrone/baja-2018/daily_files/sd-1002/'
filename_in='f:/data/cruise_data/saildrone/baja-2018/daily_files/sd-1002/l3r_temp.nc'

#definitions and things you might have to set differently for each file
itow_mask1=45 #see code just a bit below for figure to determine where to set flags here
itow_mask2=-110 #see code just a bit below for figure to determine where to set flags here
ISDP = 'Saildrone'
SST_type = 'SSTdepth'
Annex_version = '01.1'
File_version = '01.0'
astr_platform='SD1002'
astr_title = 'Data from Saildrone cruise from SF to Guadalupe Island April-June 2018'
astr_uuid = '0f410de6-4ba5-4f79-af20-8a57a445f454'
droplist=['WWND_STDDEV', 'CHLOR_MEAN','RH_MEAN','WWND_MEAN','O2_CONC_STDDEV','CDOM_STDDEV',
                          'TEMP_O2_STDDEV','BARO_PRES_MEAN','TEMP_O2_MEAN','SAL_STDDEV','TEMP_AIR_MEAN',
                          'CDOM_MEAN','SAL_MEAN','O2_SAT_MEAN','CHLOR_STDDEV', 'COND_STDDEV', 'COND_MEAN',
                          'BKSCT_RED_MEAN', 'TEMP_IR_MEAN', 'O2_SAT_STDDEV','O2_CONC_MEAN', 'TEMP_AIR_STDDEV', 
                          'BARO_PRES_STDDEV', 'TEMP_IR_STDDEV', 'VWND_STDDEV','RH_STDDEV', 'GUST_WND_STDDEV', 'GUST_WND_MEAN',
                          'BKSCT_RED_STDDEV', 'UWND_STDDEV','HDG_WING','WING_ANGLE']

In [3]:
#read data back in, into two arrays one with time encoding and one without
dataset=xr.open_dataset(filename_in,decode_times=False)
dataset_decodetime=xr.open_dataset(filename_in)

In [4]:
mint=dataset_decodetime.TIME.min().data
maxt=dataset_decodetime.TIME.max().data
dataset.attrs['time_coverage_start']=str(np.datetime64(mint,'ms'))+'Z'
dataset.attrs['time_coverage_end']=str(np.datetime64(maxt,'ms'))+'Z'
ilen=(len(dataset.LAT['obs']))
lats_usv=dataset.LAT[0,:].values
lons_usv=dataset.LON[0,:].values
dates_usv64=dataset_decodetime.TIME[0,:].values
dates_usv=pd.to_datetime(dates_usv64, unit='ns')
ilen

85681

In [5]:
#calculate the average distance between observations as the spatial resolution global attribute
#import math
#from math import cos
# approximate radius of earth in km
R = 6373.0 #km
lat1 = np.deg2rad(lats_usv[1:ilen])
lon1 = np.deg2rad(lons_usv[1:ilen])
lat2 = np.deg2rad(lats_usv[0:ilen-1])
lon2 = np.deg2rad(lons_usv[0:ilen-1])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
c = 2 * np.arctan2(a**.5, (1 - a)**.5)
distance = R * c

In [6]:
indicative_date_time=dates_usv[0].strftime("%Y%m%d%H%M%S")
Product_string = str(dataset.TEMP_CTD_MEAN.model_number) + '_' + str(dataset.TEMP_CTD_MEAN.serial_number)
filename_L3R = dir_out + indicative_date_time + \
    '-' + ISDP + '-' + 'L3R' + '-' + SST_type + '-' +Product_string+ '-v' +Annex_version+ '-fv' +File_version+ '.nc'
filename_L3R

'f:/data/cruise_data/saildrone/baja-2018/daily_files/sd-1002/20180411180000-Saildrone-L3R-SSTdepth-CTD-NH_2632-v01.1-fv01.0.nc'

In [7]:
#add global attributes that are missing
#some of these will need to be changed for new cruises
dataset.attrs['title'] = astr_title
dataset.attrs['summary'] = 'none'
dataset.attrs['references'] = 'none'
dataset.attrs['institution'] = 'Saildrone'
dataset.attrs['history'] = 'Saildrone 6-hourly v1 files were used to create this file'
dataset.attrs['comment'] = 'none'
dataset.attrs['license'] = 'free and open'
dataset.attrs['id'] = 'SSTdepth'
dataset.attrs['naming_authority'] = 'org.shipborne-radiometer'
dataset.attrs['product_version'] = '1.0'
dataset.attrs['uuid'] = astr_uuid 
dataset.attrs['l2r_version_id'] = '1.1' 
dataset.attrs['netcdf_version_id'] = '4.6.1'
dataset.attrs['date_created'] = dt.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") #yyyy-mm-ddThh:mm:ssZ
dataset.attrs['file_quality_level'] = 3
dataset.attrs['spatial_resolution'] = str(distance.mean()*1000)+' m'
dataset.attrs['start_time'] = dates_usv[0].strftime("%Y-%m-%dT%H:%M:%SZ")
dataset.attrs['time_coverage_start'] = dates_usv[0].strftime("%Y-%m-%dT%H:%M:%SZ")
dataset.attrs['stop_time'] = dates_usv[-1].strftime("%Y-%m-%dT%H:%M:%SZ")
dataset.attrs['time_coverage_end'] = dates_usv[-1].strftime("%Y-%m-%dT%H:%M:%SZ")
dataset.attrs['northernmost_latitude'] = lats_usv.max()
dataset.attrs['geospatial_lat_max'] = lats_usv.max()
dataset.attrs['southernmost_latitude'] = lats_usv.min()
dataset.attrs['geospatial_lat_min'] = lats_usv.min()
dataset.attrs['easternmost_longitude'] = lons_usv.max()
dataset.attrs['geospatial_lon_max'] = lons_usv.max()
dataset.attrs['westernmost_longitude'] = lons_usv.min()
dataset.attrs['geospatial_lon_min'] = lons_usv.min()
dataset.attrs['geospatial_lat_units'] = 'degrees_north'
dataset.attrs['geospatial_lon_units'] = 'degrees_east'
dataset.attrs['source'] = 'SSTdepth, wind_speed'
dataset.attrs['platform'] = astr_platform
dataset.attrs['sensor'] = str(dataset.TEMP_CTD_MEAN.sensor_description + '_' +\
            dataset.TEMP_CTD_MEAN.model_number + '_' + dataset.TEMP_CTD_MEAN.serial_number + ', ' + \
            dataset.UWND_MEAN.sensor_description + '_' + dataset.UWND_MEAN.model_number + '_' + \
                    dataset.UWND_MEAN.serial_number)
dataset.attrs['metadata_link'] = 'TBD'
dataset.attrs['keywords'] = 'Oceans > Ocean Temperature > Sea Surface Temperature'
dataset.attrs['keywords_vocabulary'] = 'NASA Global Change Master Directory (GCMD) Science Keywords'
dataset.attrs['acknowledgment'] = 'The Schmidt Family Foundation, Saildrone, NASA Physical Oceanography'
dataset.attrs['project'] = 'International Shipborne Radiometer Network'
dataset.attrs['publisher_name'] = 'The ISRN Project Office'
dataset.attrs['publisher_url'] = 'http://www.shipborne.radiometer.org'
dataset.attrs['publisher_email'] = 'info@shipborne-radiometer.org'
dataset.attrs['processing_level'] = '1.0'
del dataset.attrs['nodc_template_version']

In [8]:
#copy the attributes from uwnd_mean cause they are nice, add required CF ones too
#create wind_speed varible
dataset_tem=dataset.copy(deep=True)
attrs = dataset.UWND_MEAN.attrs.copy()
attrs['standard_name'] = 'wind_speed'
attrs['long_name'] = 'wind_speed'
attrs['valid_min'] = 0
attrs['valid_max'] = 100
attrs['source'] = 'anemometer'
attrs['comment'] = 'Instrument located at to of Saildrone mast at ' +\
                str(dataset.UWND_MEAN.installed_height)+' m' + '.  This was adjusted ' +\
                'to 10 m as ws_10m = ws*log(10./1e-4))/log(WS_height/1e-4'
attrs['height'] = '10 m' #str(str(dataset.UWND_MEAN.installed_height)+' m')
WS=(dataset['UWND_MEAN']**2+dataset['VWND_MEAN']**2)**.5
WS_height=int(dataset.UWND_MEAN.installed_height)
WS_10m = (WS*log(10./1e-4))/log(WS_height/1e-4)
#dataset2['wind_speed']=
dataset['wind_speed']=WS_10m
dataset.wind_speed.attrs=attrs

#copy the attributes from uwnd_mean cause they are nice, add required CF ones too
#create wind_direction varible
attrs = dataset.UWND_MEAN.attrs.copy()
attrs['standard_name'] = 'wind_to_direction'
attrs['long_name'] = 'local wind direction'
attrs['valid_min'] = 0
attrs['valid_max'] = 360
attrs['units'] = 'degrees'
attrs['source'] = 'anemometer'
attrs['height'] = str(str(dataset.UWND_MEAN.installed_height)+' m')
WD=np.arctan2(dataset.VWND_MEAN,dataset.UWND_MEAN)
dataset['wind_direction']=WD
dataset.wind_direction.attrs=attrs

In [9]:
#rename into that names that are CF compliant
#need to check with JFP about _mean vs inst ob
dataset_tem=dataset.copy(deep=True)
dataset_tem2=dataset.copy(deep=True)

dataset_tem2['TEMP_CTD_MEAN'] = dataset_tem['TEMP_CTD_MEAN'].astype(np.float32,copy=False)
attrs = dataset.TEMP_CTD_MEAN.attrs.copy()
dataset_tem2.TEMP_CTD_MEAN.attrs=attrs
dataset_tem2['LAT'] = dataset_tem['LAT'].astype(np.float32,copy=False)
attrs = dataset.LAT.attrs.copy()
dataset_tem2.LAT.attrs=attrs
dataset_tem2['LON'] = dataset_tem['LON'].astype(np.float32,copy=False)
attrs = dataset.LON.attrs.copy()
dataset_tem2.LON.attrs=attrs
dataset_tem2['TEMP_CTD_MEAN'] = dataset_tem['TEMP_CTD_MEAN'].astype(np.float32,copy=False)
attrs = dataset.TEMP_CTD_MEAN.attrs.copy()
dataset_tem2.TEMP_CTD_MEAN.attrs=attrs
dataset_tem2['COG'] = dataset_tem['COG'].astype(np.float32,copy=False)
attrs = dataset.COG.attrs.copy()
dataset_tem2.COG.attrs=attrs
dataset_tem2['HDG'] = dataset_tem['HDG'].astype(np.float32,copy=False)
attrs = dataset.HDG.attrs.copy()
dataset_tem2.HDG.attrs=attrs
dataset_tem2['ROLL'] = dataset_tem['ROLL'].astype(np.float32,copy=False)
attrs = dataset.ROLL.attrs.copy()
dataset_tem2.ROLL.attrs=attrs
dataset_tem2['PITCH'] = dataset_tem['PITCH'].astype(np.float32,copy=False)
attrs = dataset.PITCH.attrs.copy()
dataset_tem2.PITCH.attrs=attrs
dataset_tem2['SOG'] = dataset_tem['SOG'].astype(np.float32,copy=False)
attrs = dataset.SOG.attrs.copy()
dataset_tem2.SOG.attrs=attrs
dataset_tem2['TEMP_CTD_STDDEV'] = dataset_tem['TEMP_CTD_STDDEV'].astype(np.float32,copy=False)
attrs = dataset.TEMP_CTD_STDDEV.attrs.copy()
dataset_tem2.TEMP_CTD_STDDEV.attrs=attrs
dataset_tem2['wind_speed'] = dataset_tem['wind_speed'].astype(np.float32,copy=False)
attrs = dataset.wind_speed.attrs.copy()
dataset_tem2.wind_speed.attrs=attrs
dataset_tem2['wind_direction'] = dataset_tem['wind_direction'].astype(np.float32,copy=False)
attrs = dataset.wind_direction.attrs.copy()
dataset_tem2.wind_direction.attrs=attrs
dataset_tem2['TIME'] = dataset_tem['TIME'].astype(np.float32,copy=False)
attrs = dataset.TIME.attrs.copy()
dataset_tem2.TIME.attrs=attrs

dataset2 = dataset_tem2.copy(deep=True)
dataset2 = dataset_tem2.rename(
    {'TEMP_CTD_MEAN': 'sea_water_temperature',
     'COG': 'course_over_ground',
     'LAT': 'lat',
     'LON': 'lon',
     'TIME': 'time',
     'HDG': 'true_bearing',
     'ROLL': 'platform_roll',
     'PITCH': 'platform_pitch',
     'SOG': 'speed_over_ground',
    'TEMP_CTD_STDDEV':'sst_total_uncertainty'})

In [10]:
#add missing attributes to sea_water_temperature
dataset2['sea_water_temperature'].values=dataset2['sea_water_temperature'].values+273.15 #change to kelvin
attrs = dataset.TEMP_CTD_MEAN.attrs.copy()
dataset2.sea_water_temperature.attrs=attrs
dataset2.sea_water_temperature.attrs['valid_min']=260.0
dataset2.sea_water_temperature.attrs['valid_max']=320.0
dataset2.sea_water_temperature.attrs['units']='kelvin'
dataset2.sea_water_temperature.attrs['long_name']='sea surface depth temperature at 0.6m'
dataset2.time.attrs['standard_name']='time'
dataset2.time.attrs['long_name']='time'
dataset2.lon.attrs['standard_name']='longitude'
dataset2.lon.attrs['long_name']='longitude'
dataset2.lat.attrs['long_name']='latitude'
dataset2.lat.attrs['standard_name']='latitude'
dataset2.true_bearing.attrs['long_name']='platform true bearing'
dataset2.true_bearing.attrs['standard_name']='platform_orientation'
dataset2.speed_over_ground.attrs['long_name']='platform speed over ground'
dataset2.sst_total_uncertainty.attrs['standard_name']='sea_water_temperature standard error'
dataset2.sst_total_uncertainty.attrs['long_name']=' sea water temperature total uncertainty'
dataset2.sst_total_uncertainty.attrs['valid_min']=0.0
dataset2.sst_total_uncertainty.attrs['valid_max']=2.0
dataset2.sst_total_uncertainty.attrs['units']='kelvin'

In [11]:
flag_bytes=np.byte((0,1,2,3,4,5)) #bytearray([0,1,2,3,4,5])
iobs=dataset2.dims['obs']
iqual_byte = np.ones(shape=(iobs,1), dtype='b')*5  #change byte to b1
iqual_byte[:itow_mask1] = 2  #set at top of file from looking at data
iqual_byte[itow_mask2:] = 2
attrs = {'long_name': 'measurement quality value','coordinates': 'time',
         'flag_meanings': 'no_data bad_data worst_quality low_quality acceptable_quality best_quality',
         'flag_values': flag_bytes }
({'obs': ('obs', [0, 1, 2, 3], attrs)})
dataset2['quality_level'] = (('trajectory', 'obs'), iqual_byte.T)
dataset2.quality_level.attrs=attrs
dataset2.quality_level

<xarray.DataArray 'quality_level' (trajectory: 1, obs: 85681)>
array([[2, 2, 2, ..., 2, 2, 2]], dtype=int8)
Coordinates:
  * trajectory  (trajectory) float32 1002.0
Dimensions without coordinates: obs
Attributes:
    flag_meanings:  no_data bad_data worst_quality low_quality acceptable_qua...
    coordinates:    time
    flag_values:    [0 1 2 3 4 5]
    long_name:      measurement quality value

In [12]:
#dataset2 = dataset2.drop(['UWND_MEAN','VWND_MEAN','trajectory'])
dataset2 = dataset2.drop(['UWND_MEAN','VWND_MEAN'])
dataset2=dataset2.squeeze()
dataset2 = dataset2.rename({'obs':'time'})

In [13]:
dataset2['sea_water_temperature'].attrs['coordinates']='time'
dataset2['sst_total_uncertainty'].attrs['coordinates']='time'
dataset2['speed_over_ground'].attrs['coordinates']='time'
dataset2['course_over_ground'].attrs['coordinates']='time'
dataset2['platform_roll'].attrs['coordinates']='time'
dataset2['platform_pitch'].attrs['coordinates']='time'
dataset2['true_bearing'].attrs['coordinates']='time'
dataset2['wind_speed'].attrs['coordinates']='time'
dataset2['wind_direction'].attrs['coordinates']='time'
dataset2['quality_level'].attrs['coordinates']='time'

In [14]:
dataset2['platform_pitch']

<xarray.DataArray 'platform_pitch' (time: 85681)>
array([0.7, 0.8, 1. , ..., 1.4, 1.3, 1.2], dtype=float32)
Coordinates:
    trajectory  float32 1002.0
Dimensions without coordinates: time
Attributes:
    standard_name:  platform_pitch_angle
    long_name:      Pitch
    coordinates:    time
    units:          degree

In [15]:

#JF If I # these lines out it won't write out
#If I del the coordinates then writes out, but without coordinates

#del dataset2['sea_water_temperature'].attrs['coordinates']
#del dataset2['sst_total_uncertainty'].attrs['coordinates']
#del dataset2['speed_over_ground'].attrs['coordinates']
#del dataset2['course_over_ground'].attrs['coordinates']
#del dataset2['platform_roll'].attrs['coordinates']
#del dataset2['platform_pitch'].attrs['coordinates']
#del dataset2['true_bearing'].attrs['coordinates']
#del dataset2['wind_speed'].attrs['coordinates']
#del dataset2['wind_direction'].attrs['coordinates']
#del dataset2['quality_level'].attrs['coordinates']

In [16]:
#xarray to_netcdf function doesn't handle encoding vs set attributes very well.  for some of
#the variables, there is secret encoding that doesn't print out when you look at the variable.
#So, if you set an attribute that conflicts with encodings, when you try to to_netcdf output the file
#you get a really odd error about overwriting attributes.  In order to see the encodings you have to look here:
#dataset3.platform_pitch.encoding
#you can either set the encoded values directly dataset3.platform_pitch.encoding['_FillValue']=-1.0
#or you set it when writing the file out as done below
dataset2.quality_level.attrs['_FillValue']=-128
dataset2.sst_total_uncertainty.attrs['_FillValue']=-9.96921e+36
dataset2.sea_water_temperature.attrs['_FillValue']=-9.96921e+36
dataset2.speed_over_ground.attrs['_FillValue']=-9.96921e+36
dataset2.platform_roll.attrs['_FillValue']=-9.96921e+36
dataset2.true_bearing.attrs['_FillValue']=-9.96921e+36
dataset2.course_over_ground.attrs['_FillValue']=-9.96921e+36
dataset2.platform_pitch.attrs['_FillValue']=-9.96921e+36
dataset2.lon.attrs['_FillValue']=-9.96921e+36
dataset2.wind_speed.attrs['_FillValue']=-9.96921e+36
dataset2.wind_direction.attrs['_FillValue']=-9.96921e+36
dataset2.time.attrs['_FillValue']=-9.96921e+36
dataset2.lat.attrs['_FillValue']=-9.96921e+36
dataset2.lat.attrs['standard_name']='latitude'
dataset2.lon.attrs['standard_name']='longitude'
#comp = dict(_FillValue=-9.96921e+36)
#encoding = {var: comp for var in dataset4.data_vars}
#encoding['quality_level']=-128
#print(encoding)
#comp = dict(coordinates='time')
#encoding = {var: comp for var in dataset2.data_vars}
dataset2.to_netcdf(filename_L3R) #, encoding=encoding)
filename_L3R

ValueError: cannot serialize coordinates because variable sea_water_temperature already has an attribute 'coordinates'