In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
import os
import numpy as np
import pandas as pd
import time
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt
from matplotlib.widgets import MultiCursor
from scipy.signal import argrelmin, argrelmax
from scipy.interpolate import InterpolatedUnivariateSpline, interp1d
from sklearn.linear_model import LinearRegression
from joblib import Parallel, delayed
import statsmodels.api as sm

os.chdir("..")
from src import features, models
from src import definitions as defs

In [2]:
def lm_tides(data, x, y, label):
    from collections import namedtuple
    Result = namedtuple("Result", ["lm", "preds", "coef", "intercept", "eq"])
    lm = LinearRegression()
    x = eval("data.{}".format(x))
    x = x.values.reshape(-1, 1)
    y = data[y].values.reshape(-1, 1)
    lm.fit(x, y)
    preds = lm.predict(x).reshape(-1)
    coef = lm.coef_[0][0] 
    intercept = lm.intercept_[0]

    eq = "$\zeta_{{{}}}={:.4f}t {:+.4f}$".format(label, coef, intercept)
    return Result(lm, preds, coef, intercept, eq)

In [3]:
wdir = defs.ROOT / "data"

In [4]:
data = pd.read_csv(wdir / "raw" / "mongla_tides.csv").drop_duplicates().set_index("datetime")
data = data.set_index(pd.to_datetime(data.index))
data = data.reindex(pd.date_range(start=data.index[0], end=data.index[-1], freq="1H"))

data = data.rename(columns={"height": "elevation"})
data.elevation = data.elevation / 100.0

In [47]:
data[["high", "low"]] = False
data["elapsed_sec"] = (data.index - data.index[0]).total_seconds().astype(int)
data["elapsed_yr"] = data.elapsed_sec / pd.Timedelta(days=365).total_seconds()
data["rollmean"] = data.elevation.rolling(window=pd.Timedelta(days=30*6)).mean()

In [48]:
highs = data.loc[data.index[argrelmax(data.elevation.values + np.random.normal(0, 1e-9, len(data)), order=1)[0]]].copy()
lows = data.loc[data.index[argrelmin(data.elevation.values + np.random.normal(0, 1e-9, len(data)), order=1)[0]]].copy()

highs = highs.loc[highs.elevation > highs.rollmean][["elevation", "elapsed_yr"]]
lows = lows.loc[lows.elevation < lows.rollmean][["elevation", "elapsed_yr"]]

highs["rollmean"] = highs.elevation.rolling(window=pd.Timedelta(days=365)).mean()
lows["rollmean"] = lows.elevation.rolling(window=pd.Timedelta(days=365)).mean()

data.loc[highs.index, "high"] = True
data.loc[lows.index, "low"] = True

In [8]:
num_years = 3
lowess = sm.nonparametric.lowess(highs.elevation.values, highs.elapsed_yr.values, frac=(365.25 * 24 / 12.25) * num_years / len(highs), is_sorted=True)
highs["loess"] =  list(zip(*lowess))[1]
lowess = sm.nonparametric.lowess(lows.elevation.values, lows.elapsed_yr.values, frac=(365.25 * 24 / 12.25) * num_years / len(lows), is_sorted=True)
lows["loess"] =  list(zip(*lowess))[1]
lowess = sm.nonparametric.lowess(data.elevation.values, data.elapsed_yr.values, frac=(365.25 * 24) * num_years / len(data), is_sorted=True)
data.loc[data.elevation.notnull(), "loess"] = list(zip(*lowess))[1]

In [9]:
plt.close("all")
n = pd.Timedelta(minutes=60)
subset = data.dropna().resample(n).first()

data_lm = lm_tides(data=data.dropna(), x="elapsed_yr", y="elevation", label="MW")
highs_lm = lm_tides(data=highs, x="elapsed_yr", y="elevation", label="MHW")
lows_lm = lm_tides(data=lows, x="elapsed_yr", y="elevation", label="MLW")

fig = plt.figure(figsize=(10, 5))
ax = plt.axes()

sns.scatterplot(ax=ax, x=subset.index, y=subset.elevation, color="cornflowerblue", marker=".", edgecolor=None, alpha=0.3, s=1)
sns.scatterplot(ax=ax, x=highs.index, y=highs.elevation, marker=".", color="green", edgecolor=None, s=3, alpha=0.7)
sns.scatterplot(ax=ax, x=lows.index, y=lows.elevation, marker=".", color="red", edgecolor=None, s=3, alpha=0.7)

# sns.lineplot(ax=ax, x=data.index, y=data.rollmean, color="blue", linestyle="--", zorder=101)
# sns.lineplot(ax=ax, x=highs.index, y=highs_lm.preds, color="darkgreen", linestyle="--", zorder=101, label=highs_lm.eq)
# sns.lineplot(ax=ax, x=[data.index[0], data.index[-1]], y=[data_lm.preds[0], data_lm.preds[-1]], color="blue", linestyle="--", zorder=100, label=data_lm.eq)
# sns.lineplot(ax=ax, x=lows.index, y=lows_lm.preds, color="darkred", linestyle="--", zorder=102, label=lows_lm.eq)

sns.lineplot(ax=ax, x=highs.index, y=highs.loess, color="darkgreen", linestyle="--", zorder=101)
sns.lineplot(ax=ax, x=lows.index, y=lows.loess, color="darkred", linestyle="--", zorder=101)
sns.lineplot(ax=ax, x=data.index, y=data.loess, color="blue", linestyle="--", zorder=101)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:ylabel='elevation'>

In [59]:
plt.close("all")

fig = plt.figure(figsize=(10, 5))
ax = plt.axes()

pdata = data.loc["1977-03-14":"1977-03-15"]

sns.lineplot(ax=ax, x=pdata.index, y=pdata.elevation, color="cornflowerblue", alpha=0.3)
sns.lineplot(ax=ax, x=pdata.index, y=pdata.rollmean, color="red", alpha=0.3)
sns.scatterplot(ax=ax, x=pdata.loc[pdata.high == True].index, y=pdata.loc[pdata.high == True].elevation, color="green")
sns.scatterplot(ax=ax, x=pdata.loc[pdata.low == True].index, y=pdata.loc[pdata.low == True].elevation, color="red")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:ylabel='elevation'>

In [50]:
combined = data.loc[(data.high == True) | (data.low == True)].sort_index()

In [56]:
combined[combined.low.diff() == False]

Unnamed: 0,elevation,high,low,elapsed_sec,elapsed_yr,rollmean
1977-03-15 12:00:00,2.55,True,False,6346800,0.201256,1.823243
1977-04-30 01:00:00,2.62,True,False,10281600,0.326027,1.882226
1977-06-01 08:00:00,0.73,False,True,13071600,0.414498,1.963260
1977-07-24 18:00:00,3.54,True,False,17686800,0.560845,2.150870
1977-07-25 09:00:00,3.63,True,False,17740800,0.562557,2.152868
...,...,...,...,...,...,...
2011-09-06 02:00:00,0.48,False,True,1094346000,34.701484,2.648889
2011-09-06 15:00:00,0.56,False,True,1094392800,34.702968,2.644551
2011-09-07 03:00:00,0.54,False,True,1094436000,34.704338,2.640271
2011-09-07 17:00:00,0.52,False,True,1094486400,34.705936,2.635190


In [45]:
combined

Unnamed: 0,elevation,high,low,elapsed_sec,elapsed_yr,rollmean
1977-01-01 06:00:00,1.01,False,True,18000,0.000571,1.686667
1977-01-01 12:00:00,2.38,True,False,39600,0.001256,1.775833
1977-01-01 18:00:00,0.79,False,True,61200,0.001941,1.662222
1977-01-02 00:00:00,2.80,True,False,82800,0.002626,1.741667
1977-01-02 01:00:00,2.80,True,False,86400,0.002740,1.784000
...,...,...,...,...,...,...
2011-12-30 17:00:00,3.90,True,False,1104336000,35.018265,2.652903
2011-12-31 00:00:00,1.07,False,True,1104361200,35.019064,2.653678
2011-12-31 05:00:00,3.90,True,False,1104379200,35.019635,2.652329
2011-12-31 13:00:00,0.88,False,True,1104408000,35.020548,2.652625
