In [1]:
import pandas as pd
import numpy as np
import glob
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from scipy import stats


In [2]:
def ooi_kd_calc(df):
    # pull out depth and par (log for calculation)
    z = np.abs(df['z'].to_numpy())
    par = df['par'].to_numpy()
    par[(par<=0) | np.isnan(par)] = 1
    # sort by depth correctly
#     par = par[np.argsort(z)]
#     z = z[np.argsort(z)]
    
    # calc ln of par 
    par = np.log(par)
    # calculate Kd from the log-linear slope of the light decay
    fit = stats.linregress(x=z,y=par)
    par_pred = fit.slope*z + par[0]
    kd = np.abs(fit.slope)
    rval = fit.rvalue
    stderr=np.exp(fit.stderr)

    return kd,rval,stderr

In [3]:
# calculate Kd from OOI data


FF = glob.glob('data/data*.csv')
FF.sort()

ooi_kd_array = pd.DataFrame(data=None,
                            index=range(len(FF)),
                            columns=['time','lat','lon','kd','rval','stderr','stn']
                           )

for jj in range(len(FF)):
    df = pd.read_csv(FF[jj],
                     index_col=0,
                     header=0,
                     usecols=[i for i in range(6)],
                     names=['', 'time', 'lat', 'lon', 'z', 'par']
                    ) #
    ooi_kd_array.loc[jj,'stn'] = FF[jj][9:-4]
    
    try:
        kd,rval,stderr = ooi_kd_calc(df)
    except:
        kd = np.nan
        rval = np.nan
        stderr = np.nan

    time = df.loc[1,'time']
    lat = df.loc[1,'lat']
    lon = df.loc[1,'lon']

    ooi_kd_array.loc[jj,'time'] = time
    ooi_kd_array.loc[jj,'lat'] = lat
    ooi_kd_array.loc[jj,'lon'] = lon
    ooi_kd_array.loc[jj,'kd'] = kd
    ooi_kd_array.loc[jj,'rval'] = rval
    ooi_kd_array.loc[jj,'stderr'] = stderr


# pd.DataFrame()
# d = {'time' = ff[0,1],'kd': m, 'rval': rval, 'stderr': stderr}
# df = pd.DataFrame(data=d)

ooi_kd_array.to_csv('results/ooi_kdcalculations.csv')
ooi_kd_array

Unnamed: 0,time,lat,lon,kd,rval,stderr,stn
0,2019-06-03T07:59:00Z,44.6589,-124.09595,0.140367,-0.223163,1.106053,_ooi-ce01issp-sp001-10-paradj000_2019-06-03T06...
1,2019-08-06T20:00:00Z,44.6589,-124.09595,0.395541,-0.98714,1.015657,_ooi-ce01issp-sp001-10-paradj000_2019-08-06T15...
2,2020-08-04T02:20:00Z,44.6589,-124.09595,0.192614,-0.578791,1.040824,_ooi-ce01issp-sp001-10-paradj000_2020-08-03T22...
3,2021-07-04T08:00:00Z,44.6589,-124.09595,0.127788,-0.254607,1.084256,_ooi-ce01issp-sp001-10-paradj000_2021-07-04T06...
4,2021-08-02T08:03:00Z,44.6589,-124.09595,0.000062,-0.874119,1.000008,_ooi-ce01issp-sp001-10-paradj000_2021-08-02T05...
...,...,...,...,...,...,...,...
360,2023-12-21T23:33:00Z,45.816712,-129.754055,0.0,0.0,1.0,_ooi-rs03axps-sf03a-3c-parada301_2023-12-21 23...
361,2023-12-28T11:31:00Z,45.816712,-129.754055,0.004414,-0.139372,1.000633,_ooi-rs03axps-sf03a-3c-parada301_2023-12-28 11...
362,2024-03-21T19:11:00Z,45.816712,-129.754055,0.010257,-0.34197,1.000575,_ooi-rs03axps-sf03a-3c-parada301_2024-03-21 19...
363,2024-03-28T07:11:00Z,45.816712,-129.754055,0.004171,-0.156913,1.000544,_ooi-rs03axps-sf03a-3c-parada301_2024-03-28 07...


In [3]:
# calculate kd for all of gulf of mexico ctd profiles 
FF = glob.glob('data/data*.csv')
FF.sort()

ws_kd_array = pd.DataFrame(data=None,
                            index=range(len(FF)),
                            columns=['time','lat','lon','kd','rval','stderr','stn']
                           )

for jj in range(len(FF)):
    df = pd.read_csv(FF[jj],
                     index_col=0,
                     header=0,
                     usecols=[i for i in [0,2,3,4,5,6,28]],
                     names=['','prof', 'time', 'lat', 'lon', 'z', 'par']
                    )
    ws_kd_array.loc[jj,'stn'] = FF[jj][12:-4]
    
    try:
        kd,rval,stderr = ooi_kd_calc(df)
    except:
        kd = np.nan
        rval = np.nan
        stderr = np.nan

    time = df.loc[1,'time']
    lat = df.loc[1,'lat']
    lon = df.loc[1,'lon']

    ws_kd_array.loc[jj,'time'] = time
    ws_kd_array.loc[jj,'lat'] = lat
    ws_kd_array.loc[jj,'lon'] = lon
    ws_kd_array.loc[jj,'kd'] = kd
    ws_kd_array.loc[jj,'rval'] = rval
    ws_kd_array.loc[jj,'stderr'] = stderr



pname = 'results/'
ws_kd_array.to_csv(pname+'ws_kdcalculations.csv')
ws_kd_array

Unnamed: 0,time,lat,lon,kd,rval,stderr,stn
0,2018-12-18T09:10:05Z,24.671667,-81.203833,0.486118,-0.944229,1.004017,18351_WS18351_STA16_2018-12-18 03:10:05+00:00
1,2018-12-18T08:19:41Z,24.597167,-81.181333,0.681439,-0.740025,1.010011,18351_WS18351_STA18_2018-12-18 02:19:41+00:00
2,2020-08-20T00:54:19Z,25.404333,-82.2105,0.000122,-0.005332,1.000413,20231_WS20231_STA_30_2020-08-19 18:54:19+00:00
3,2020-10-09T09:36:26Z,26.632167,-82.5595,1.281848,-0.801846,1.017906,20279_WS20279_WS20278_Stn_BG4_2020-10-09 03:36...
4,2020-10-09T07:06:32Z,26.577167,-82.499167,1.95134,-0.903889,1.018198,20279_WS20279_WS20278_Stn_RP4_2020-10-09 01:06...
5,2021-04-04T20:59:47Z,25.347667,-81.655167,0.095027,-0.426382,1.003679,21093_WS21093_STN_57_3_2021-04-04 14:59:47+00:00
6,2022-05-25T12:17:32Z,27.617333,-83.2995,0.007681,-0.327918,1.000349,22141_WS22141_WS22141_Stn_TB5_2022-05-25 06:17...
7,2022-10-13T07:38:11Z,25.731833,-81.574667,0.322964,-0.621938,1.007433,22281_WS22281_Stn041_2022-10-13 01:38:11+00:00
8,2022-12-06T16:47:57Z,27.494333,-82.752333,0.073917,-0.479937,1.002323,22337_WS22337_WS22337_Stn_AMI1_2022-12-06 10:4...
9,2022-12-06T15:46:50Z,27.458333,-82.8465,0.033525,-0.399525,1.001226,22337_WS22337_WS22337_Stn_AMI2_2022-12-06 09:4...


In [4]:
# Process BCODMO data : only keep unique entries to search ICESat-2
FF = glob.glob('bcodmo/*.csv')
FF.sort()

for jj in range(len(FF)):
    df = pd.read_csv(FF[jj],header=0)
    df.drop(0,inplace=True)
    df = df.drop_duplicates(subset=['time'])
    df.reset_index(drop=True, inplace=True)

    print(FF[jj])
    print(df.head())
    
    fname = FF[jj][:-4]+'_unique.pkl'
    df.to_pickle(fname)



bcodmo/bcodmo_dataset_518582_2e2e_e9a0_6c3e.csv
  cast latitude  longitude depth_w                  time     par
0   01   21.391  -158.2908    1323  2013-07-02T04:14:31Z  383.36
1   01    22.75  -158.0002    4743  2013-07-02T21:55:50Z  3588.1
2   02    22.75  -158.0003    4742  2013-07-03T13:53:21Z  2.2892
3   01  24.0882   -157.999    4496  2013-07-03T21:56:40Z  1001.3
4   01  27.0658  -157.9975    5392  2013-07-04T13:59:16Z  1.9284
bcodmo/bcodmo_dataset_757784_f8fc_ca99_ecce.csv
  Station       Site                  time depth     Par  latitude  longitude
0     001  Shakedown  2018-05-07T17:46:00Z   3.0  1460.0  11.24758  -56.32338
1     001  Shakedown  2018-05-07T17:47:00Z   7.0   169.0  11.24768  -56.32336
2     001  Shakedown  2018-05-07T17:48:00Z  30.0    45.1  11.24788  -56.32324
3     001  Shakedown  2018-05-07T17:49:00Z  56.0    14.8  11.24816  -56.32307
4     001  Shakedown  2018-05-07T17:50:00Z  85.0    3.71  11.24842  -56.32296
bcodmo/bcodmo_dataset_774958_89bb_d996_f2f0.cs

In [20]:
# Process BCODMO data
FF = glob.glob('bcodmo/*.csv')
FF.sort()

# ws_kd_array = pd.DataFrame(data=None,
#                             index=range(len(FF)),
#                             columns=['time','lat','lon','kd','rval','stderr','stn']
#                            )
jj = 1

df = pd.read_csv(FF[jj],
                 header=0)
df = df.drop_duplicates(subset=['time'])
df.head()

Unnamed: 0,Station,Site,time,depth,Par,latitude,longitude
0,unitless,unitless,UTC,m,micromoles photons per square meter per second...,degrees_north,degrees_east
1,001,Shakedown,2018-05-07T17:46:00Z,3.0,1460.0,11.24758,-56.32338
5,001,Shakedown,2018-05-07T17:47:00Z,7.0,169.0,11.24768,-56.32336
28,001,Shakedown,2018-05-07T17:48:00Z,30.0,45.1,11.24788,-56.32324
54,001,Shakedown,2018-05-07T17:49:00Z,56.0,14.8,11.24816,-56.32307
