In [1]:
from datetime import datetime
import fsspec
import os
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import xarray as xr

In [2]:
local = fsspec.filesystem('file')
data_dir = 'C:/Users/Ian/Box/pco2_skq/data/daily'

In [98]:
apollo_files = local.glob(os.path.join(data_dir,'*/*APOLLO*.nc'))
wind_files = local.glob(os.path.join(data_dir,'*/*GILL75-1*.nc'))
sst_files = local.glob(os.path.join(data_dir,'*/*SBE38-1*.nc'))
met_files = local.glob(os.path.join(data_dir,'*/*MET*.nc'))
gps_files = local.glob(os.path.join(data_dir,'*/*GPS-POSITION*.nc'))
sbe45_files = local.glob(os.path.join(data_dir,'*/*SBE45-1*.nc'))
oxy_files = local.glob(os.path.join(data_dir,'*/*OPTODE*.nc'))
trip_files = local.glob(os.path.join(data_dir, '*/*TRIPLET*.nc'))
nit_files = local.glob(os.path.join(data_dir, '*/*SUNA*.nc'))

In [5]:
def combine_sensor_files(filepaths):
    with multiprocessing.Pool() as pool:
        ds_list = pool.map(xr.open_dataset,filepaths)
    try:
        ds = xr.combine_by_coords(ds_list, combine_attrs='drop_conflicts')
    except:
        ds = xr.concat(ds_list, dim = 'time', combine_attrs='drop_conflicts')
        ds = ds.drop_duplicates(dim = 'time')
    return ds

In [6]:
def med_filt(da: xr.DataArray, window_size: int = 11) -> xr.DataArray:
  mfda = da.rolling({'time': window_size}, center = True, min_periods = 1).median(skipna = True)
  return mfda

def stdev_filt(da: xr.DataArray, multiplier: float = 3) -> xr.DataArray:
  sfda = da.where((da > da.mean() - multiplier * da.std()) & (da < da.mean() + multiplier * da.std()), np.nan)
  return sfda

In [93]:
ecotrip = combine_sensor_files(trip_files)
ecotrip = ecotrip.sortby('time')
chl = ecotrip.fluorometric_chlorophyll_a
cdom = ecotrip.fluorometric_cdom

chlr = med_filt(chl,window_size = 59)
cdomr = med_filt(chl, window_size = 59)

In [96]:
sbe45 = combine_sensor_files(sbe45_files)
sbe45 = sbe45.sortby('time')
pracsal = sbe45.sea_water_practical_salinity
pracsalr = med_filt(pracsal, window_size = 13*5)  

In [46]:
sst = combine_sensor_files(sst_files)
sst = sst.sortby('time')
sst = sst.rename({'sea_water_temperature':'sea_surface_temperature'})
sst = sst.sea_surface_temperature
sstr = med_filt(sst, window_size = 29)  #

In [73]:
oxy = combine_sensor_files(oxy_files)
oxy = oxy.sortby('time')
doxy = oxy.dissolved_oxygen
doxyr = med_filt(doxy, window_size = 13*5)

In [24]:
wind = combine_sensor_files(wind_files)
wind = wind.sortby('time')
wind_dir = wind.true_wind_direction
wind_spd = wind.true_wind_speed
wind_dirr = med_filt(wind_dir, window_size = 11)
wind_spdr = med_filt(wind_spd, window_size = 29)

In [43]:
met = combine_sensor_files(met_files)
met = met.sortby('time')
barop = met.barometric_pressure
air_temp = met.air_temperature
baropr = med_filt(barop, window_size = 59)
air_tempr = med_filt(air_temp, window_size = 29)

In [51]:
gps = combine_sensor_files(gps_files)
gps = gps.sortby('time')
lat = gps.latitude
lon = gps.longitude

In [104]:
nit = combine_sensor_files(nit_files)
nit = nit.sortby('time')
nit = nit.nitrate
nitr = med_filt(nit, window_size = 5)

In [110]:
apollo = combine_sensor_files(apollo_files)
asw = apollo.where(apollo.sample_source.str.contains('Seawater'), drop = True)    
aair = apollo.where(apollo.sample_source.str.contains('Seawater'), drop = True)   

In [111]:
asw

In [105]:
fig, ax = plt.subplots(1,1)
ax.plot(nit.time, nit)
ax.plot(nitr.time, nitr)