In [257]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
%matplotlib notebook
import matplotlib as mpl
import matplotlib.patches as patches
from datetime import datetime, timedelta

In [227]:
pd.read_csv('FLO_L3_CascoBay20171219_revised.csv').columns

Index(['/fields=year', 'month', 'day', 'secDay', 'Lat', 'Lon', 'heading',
       'speed', 'Wt', 'cond', 'sal', 'stimfCDOM', 'stimf', 'c660',
       'SBE43_oxygen', 'pressure_atm', 'L1_fCO2', 'L2_fCO2', 'omega_Ar',
       'omega_Ca', 'bottom_flag'],
      dtype='object')

In [228]:
curr_df = pd.read_csv('Tidal_Current Data/CAB1409_prediction_Timeseries_2017-05-02_cleaned.csv')

In [229]:
curr_df['secDay'] = curr_df['Date_Time (GMT)'].apply(lambda x: (datetime.strptime(x, '%Y-%m-%d %I:%M:%S %p')) - datetime(1970,1,1))
curr_df['secDay'] = curr_df['secDay'].apply(lambda x: x.total_seconds() % (24*60*60)) # only care about the number of seconds in that day


In [230]:
curr_df.head()

Unnamed: 0,Date_Time (GMT),Speed (cm/sec),Direction (degrees relative to North),secDay
0,2017-05-02 12:00:00 AM,-20.8,179,0.0
1,2017-05-02 12:06:00 AM,-18.9,179,360.0
2,2017-05-02 12:12:00 AM,-17.1,179,720.0
3,2017-05-02 12:18:00 AM,-15.4,179,1080.0
4,2017-05-02 12:24:00 AM,-13.7,179,1440.0


In [263]:
tidal_df = pd.read_csv('Tidal_data.csv')

In [264]:
tidal_df

Unnamed: 0,Date,Day,Time (EST),Pred,High/Low
0,2017/03/17,Fri,02:51,9.36,H
1,2017/03/17,Fri,09:08,0.39,L
2,2017/03/17,Fri,15:20,8.73,H
3,2017/03/17,Fri,21:21,0.98,L
4,2017/05/02,Tue,04:23,10.47,H
5,2017/05/02,Tue,10:53,-0.53,L
6,2017/05/02,Tue,17:14,9.44,H
7,2017/05/02,Tue,23:14,0.71,L
8,2017/12/19,Tue,05:24,0.95,L
9,2017/12/19,Tue,11:33,9.77,H


In [273]:
# cleans data

tidal_df['datetime (EST)'] = tidal_df['Date'] + ' ' + tidal_df['Time (EST)']
tidal_df['datetime (EST)'] = tidal_df.apply(lambda row: datetime.strptime(row['datetime (EST)'], '%Y/%m/%d %H:%M')
                                            .strftime('%Y-%m-%d %I:%M:%S %p'), axis=1)
tidal_df['datetime (GMT)'] = tidal_df['datetime (EST)'].apply(lambda x: (datetime.strptime(x, '%Y-%m-%d %I:%M:%S %p') + timedelta(hours=5)).strftime('%Y-%m-%d %I:%M:%S %p'))
tidal_df['Pred (m)'] = tidal_df['Pred'] * 0.3048 # converts ft to meters

tidal_df['secDay'] = tidal_df['datetime (GMT)'].apply(lambda x: (datetime.strptime(x, '%Y-%m-%d %I:%M:%S %p')) - datetime(1970,1,1))
tidal_df['secDay'] = tidal_df['secDay'].apply(lambda x: x.total_seconds() % (24*60*60)) # only care about the number of seconds in that day


In [332]:
# only includes 
tidal_df = tidal_df[tidal_df['datetime (EST)'].apply(lambda x: (datetime.strptime(x, '%Y-%m-%d %I:%M:%S %p')).date()) == 
                    tidal_df['datetime (GMT)'].apply(lambda x: (datetime.strptime(x, '%Y-%m-%d %I:%M:%S %p')).date())]
tidal_df.reset_index(inplace=True, drop=True)
tidal_df

Unnamed: 0,Date,Day,Time (EST),Pred,High/Low,datetime (EST),datetime (GMT),Pred (m),secDay
0,2017/03/17,Fri,02:51,9.36,H,2017-03-17 02:51:00 AM,2017-03-17 07:51:00 AM,2.852928,28260.0
1,2017/03/17,Fri,09:08,0.39,L,2017-03-17 09:08:00 AM,2017-03-17 02:08:00 PM,0.118872,50880.0
2,2017/03/17,Fri,15:20,8.73,H,2017-03-17 03:20:00 PM,2017-03-17 08:20:00 PM,2.660904,73200.0
3,2017/05/02,Tue,04:23,10.47,H,2017-05-02 04:23:00 AM,2017-05-02 09:23:00 AM,3.191256,33780.0
4,2017/05/02,Tue,10:53,-0.53,L,2017-05-02 10:53:00 AM,2017-05-02 03:53:00 PM,-0.161544,57180.0
5,2017/05/02,Tue,17:14,9.44,H,2017-05-02 05:14:00 PM,2017-05-02 10:14:00 PM,2.877312,80040.0
6,2017/12/19,Tue,05:24,0.95,L,2017-12-19 05:24:00 AM,2017-12-19 10:24:00 AM,0.28956,37440.0
7,2017/12/19,Tue,11:33,9.77,H,2017-12-19 11:33:00 AM,2017-12-19 04:33:00 PM,2.977896,59580.0
8,2017/12/19,Tue,17:58,-0.03,L,2017-12-19 05:58:00 PM,2017-12-19 10:58:00 PM,-0.009144,82680.0
9,2018/03/28,Wed,02:54,0.27,L,2018-03-28 02:54:00 AM,2018-03-28 07:54:00 AM,0.082296,28440.0


In [333]:
heights = tidal_df.groupby(['Date', 'High/Low'])['Pred (m)'].mean()
heights

Date        High/Low
2017/03/17  H           2.756916
            L           0.118872
2017/05/02  H           3.034284
            L          -0.161544
2017/12/19  H           2.977896
            L           0.140208
2018/03/28  H           3.108960
            L          -0.047244
2018/05/07  H           2.667000
            L           0.490728
Name: Pred (m), dtype: float64

In [434]:
phase_shifts = tidal_df.groupby(['Date', 'High/Low'])['secDay'].max()[::2].values
phase_shifts

array([ 73200.,  80040.,  59580.,  51000.,  49680.])

In [435]:
df_temp = tidal_df.groupby(['Date'])['secDay'].diff()

In [436]:
null_indices = df_temp[np.isnan(df_temp)].index.values
null_indices

array([ 0,  3,  6,  9, 12])

In [437]:
split_dfs = []
for i in range(len(null_indices)):
    try:
        split_dfs.append(df_temp.truncate(null_indices[i], null_indices[i+1]).dropna())
    except:
        split_dfs.append(df_temp.truncate(null_indices[i]).dropna())
periods = [2 * x.mean() for x in split_dfs]
periods

[44940.0, 46260.0, 45240.0, 45540.0, 45480.0]

In [464]:
def predict_wave(t, max_wave_height, min_wave_height, period, phase_shift):
    amp = np.abs((max_wave_height - min_wave_height) / 2)
    vert_shift = np.abs(np.mean([max_wave_height, min_wave_height]))
    w = 2 * np.pi / period 
    return amp * np.cos(w * t - w * phase_shift) + vert_shift

In [465]:
predict_wave(32880, 2.75, 0.21, periods[0], 10260)

0.21027927619978271

In [466]:
dict(curr_df.iloc[0])

{'Date_Time (GMT)': '2017-05-02 12:00:00 AM',
 'Direction (degrees relative to North)': 179,
 'Speed (cm/sec)': -20.800000000000001,
 'secDay': 0.0}

In [467]:
tides_df = pd.concat([heights.unstack(level='High/Low')['H'], heights.unstack(level='High/Low')['L']], axis=1)
tides_df['period'] = periods
tides_df['phase_shift'] = phase_shifts
tides_df.rename(index=str, columns=dict(H='max_wave_height', L='min_wave_height'), inplace=True)
tides_df

Unnamed: 0_level_0,max_wave_height,min_wave_height,period,phase_shift
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017/03/17,2.756916,0.118872,44940.0,73200.0
2017/05/02,3.034284,-0.161544,46260.0,80040.0
2017/12/19,2.977896,0.140208,45240.0,59580.0
2018/03/28,3.10896,-0.047244,45540.0,51000.0
2018/05/07,2.667,0.490728,45480.0,49680.0


In [468]:
tides_df.to_csv('tides_df.csv', index=False)

In [475]:
i = 2
predict_wave(tidal_df['secDay'][3*i:3*(i+1)], **dict(tides_df.iloc[i]))

6    0.143360
7    2.977896
8    0.143360
Name: secDay, dtype: float64

In [476]:
tidal_df['Pred (m)'][3*i:3*(i+1)]

6    0.289560
7    2.977896
8   -0.009144
Name: Pred (m), dtype: float64

## Loads in the dataframes from each date

In [212]:
df1 = pd.read_csv('FLO_L3_20170317.csv', skiprows=[1])
df2 = pd.read_csv('FLO_L3_20170502.csv', skiprows=[1])
df3 = pd.read_csv('FLO_L3_CascoBay20171219_revised.csv', skiprows=[1])

In [290]:
df1.columns

Index(['Julian Day', 'secDay', 'Lat', 'Lon', 'Heading', 'Speed (m/s)',
       'Water temp (C)', 'Cond (mmho/cm)', 'Salinity (PSU)', 'CDOM (ppb)',
       'Chl a (mg/m3)', 'Attenuation (c660)', 'O2 umol/L', '% sat O2',
       'Pressure (hPa)', 'sw xCO2 m (umol/mol)', 'sw xCO2 dry (umol/mol)',
       'sw pCO2 dry (uatm)', 'sw pCO2 (uatm)', 'sw fCO2 (uatm)',
       'air xCO2 (uatm)', 'air fCO2 (uatm)', 'air xCO2 (uatm).1',
       'air fCO2 (uatm).1', 'solar zenith (deg)', 'depth (m)',
       'Nitrate (umol/L)', 'Pressure (hPa).1', 'Phyco (ppb)', 'pH',
       'O2 umol/L.1', '% sat O2.1', 'air temp', 'PAR'],
      dtype='object')