In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
%config Completer.use_jedi = False
import os
import time

import numpy as np
import pandas as pd
import seaborn as sns

sns.set()
import statsmodels.api as sm
from joblib import Parallel, delayed
from matplotlib import pyplot as plt
from matplotlib.widgets import MultiCursor
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
from scipy.interpolate import InterpolatedUnivariateSpline, interp1d
from scipy.signal import argrelmax, argrelmin, find_peaks
from sklearn.linear_model import LinearRegression
from sidecar import Sidecar

os.chdir("..")
from src import definitions as defs
from src import features, models

In [2]:
def lm_tides(data, label=None):
    from collections import namedtuple
    Result = namedtuple("Result", ["lm", "index", "preds", "coef", "intercept", "label", "eq"])
    lm = LinearRegression()
    index = data.index
    x = (data.index - data.index[0]).total_seconds().astype(int) / pd.Timedelta(days=365).total_seconds()
    x = x.values.reshape(-1, 1)
    y = data.values.reshape(-1, 1)
    lm.fit(x, y)
    preds = lm.predict(x).reshape(-1)
    coef = lm.coef_[0][0] 
    intercept = lm.intercept_[0]

    eq = "$\zeta_{{{}}}={:.4f}t {:+.4f}$".format(label, coef, intercept)
    return Result(lm, index, preds, coef, intercept, label, eq)

In [3]:
wdir = defs.ROOT / "data"

In [4]:
data = pd.read_csv(wdir / "raw" / "mongla_tides.csv").drop_duplicates().set_index("datetime")
data = data.set_index(pd.to_datetime(data.index))
data = data.reindex(pd.date_range(start=data.index[0], end=data.index[-1], freq="1H"))
data = data.rename(columns={"height": "elevation"})
data.index.name = "datetime"
data.elevation = data.elevation / 100.0

In [5]:
data

Unnamed: 0_level_0,elevation
datetime,Unnamed: 1_level_1
1977-01-01 01:00:00,2.50
1977-01-01 02:00:00,2.16
1977-01-01 03:00:00,1.80
1977-01-01 04:00:00,1.46
1977-01-01 05:00:00,1.19
...,...
2011-12-31 20:00:00,3.17
2011-12-31 21:00:00,2.57
2011-12-31 22:00:00,2.04
2011-12-31 23:00:00,1.60


In [None]:
data[["high", "low"]] = False
distance = 8
hi = find_peaks(x=data.elevation.values, distance=distance)[0]
lo = find_peaks(x=data.elevation.values * -1, distance=distance)[0]
data.loc[data.iloc[hi].index, "high"] = True
data.loc[data.iloc[lo].index, "low"] = True

In [None]:
window = pd.Timedelta("24h 50min") * 3 # three tidal cycles
data["high_roll"] = data.elevation[data.high == True].rolling(window=window, center=True).mean().reindex(index=data.index).interpolate(method="time", limit_direction="both")
data["low_roll"] = data.elevation[data.low == True].rolling(window=window, center=True).mean().reindex(index=data.index).interpolate(method="time", limit_direction="both")

In [None]:
dt = (data.index - data.index[0]).total_seconds().astype(int)
frac = (24 * 60 + 50) / 60 / len(data)
data["high_smooth"] = list(zip(*sm.nonparametric.lowess(data.high_roll.values, dt, frac=frac, is_sorted=True)))[1]
data["low_smooth"] = list(zip(*sm.nonparametric.lowess(data.low_roll.values, dt, frac=frac, is_sorted=True)))[1]

data["amp"] = data.high_smooth - data.low_smooth

In [None]:
data[["spring", "neap"]] = False
distance = 12 * 24
hi = find_peaks(x=data.amp.values, distance=distance)[0]
lo = find_peaks(x=data.amp.values * -1, distance=distance)[0]
data.loc[data.iloc[hi].index, "spring"] = True
data.loc[data.iloc[lo].index, "neap"] = True

In [None]:
span = "90D"
start = pd.to_datetime("1977-01-15")
pdata = data.loc[pd.to_datetime(start) - pd.Timedelta(span):pd.to_datetime(start) + pd.Timedelta(span)].reset_index()

fig = plt.figure(figsize=(10, 5))
ax = fig.subplots()

sns.lineplot(data=pdata, x="datetime", y="elevation", color="cornflowerblue", alpha=0.3, ax=ax)
sns.lineplot(data=pdata, x="datetime", y="amp", color="black", alpha=0.3, ax=ax)
sns.scatterplot(data=pdata.loc[pdata.high == True], x="datetime", y="elevation", color="green", s=15, ax=ax)
sns.scatterplot(data=pdata.loc[pdata.low == True], x="datetime", y="elevation", color="red", s=15, ax=ax)

sns.scatterplot(data=pdata.loc[pdata.spring_high == True], x="datetime", y="elevation", color="green", marker="^", s=50, ax=ax)
sns.scatterplot(data=pdata.loc[pdata.neap_high == True], x="datetime", y="elevation", color="red", marker="s", s=50, ax=ax)

sns.scatterplot(data=pdata.loc[pdata.spring_low == True], x="datetime", y="elevation", color="green", marker="s", s=50, ax=ax)
sns.scatterplot(data=pdata.loc[pdata.neap_low == True], x="datetime", y="elevation", color="red", marker="s", s=50, ax=ax)

for i in range(0, pdata.loc[pdata.spring == True].shape[0]):
    xmin = pdata.loc[pdata.spring == True].datetime.iloc[i] - pd.Timedelta("6H")
    xmax = pdata.loc[pdata.spring == True].datetime.iloc[i] + pd.Timedelta("6H")
    plt.axvspan(xmin=xmin, xmax=xmax, color="green", alpha=0.4)

for i in range(0, pdata.loc[pdata.neap == True].shape[0]):
    xmin = pdata.loc[pdata.neap == True].datetime.iloc[i] - pd.Timedelta("6H")
    xmax = pdata.loc[pdata.neap == True].datetime.iloc[i] + pd.Timedelta("6H")
    plt.axvspan(xmin=xmin, xmax=xmax, color="red", alpha=0.4)

title = "Mongla Tidal Data"
xlabel = ""
ylabel = "Elevation (m)"
rotation = 0

locator = locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)

ax.xaxis.set(major_locator=locator, major_formatter=formatter)
ax.tick_params(axis='x', labelrotation=rotation)
ax.set(title=title, xlabel=xlabel, ylabel=ylabel)
fig.tight_layout()

In [None]:
spring_locs = data[data.spring == True].index
neap_locs = data[data.neap == True].index

data[["spring_high", "spring_low", "neap_high", "neap_low"]] = False
window = pd.Timedelta("15H")
for loc in spring_locs:
    hloc = data[data.high == True].elevation.loc[loc-window:loc+window].idxmax()
    data.loc[hloc, "spring_high"] = True
    lloc = data[data.low == True].elevation.loc[loc-window:loc+window].idxmin()
    data.loc[lloc, "spring_low"] = True

for loc in neap_locs:
    hloc = data[data.high == True].elevation.loc[loc-window:loc+window].idxmin()
    data.loc[hloc, "neap_high"] = True
    lloc = data[data.low == True].elevation.loc[loc-window:loc+window].idxmax()
    data.loc[lloc, "neap_low"] = True

In [None]:
def lm_tides(data, label=None):
    from collections import namedtuple
    Result = namedtuple("Result", ["lm", "index", "preds", "coef", "intercept", "label", "eq"])
    lm = LinearRegression()
    index = data.index
    x = (data.index - data.index[0]).total_seconds().astype(int) / pd.Timedelta(days=365).total_seconds()
    x = x.values.reshape(-1, 1)
    y = data.values.reshape(-1, 1)
    lm.fit(x, y)
    preds = lm.predict(x).reshape(-1)
    coef = lm.coef_[0][0] 
    intercept = lm.intercept_[0]

    eq = "$\zeta_{{{}}}={:.4f}t {:+.4f}$".format(label, coef, intercept)
    return Result(lm, index, preds, coef, intercept, label, eq)

In [None]:
subset = data.loc["2000":]

data_lm = lm_tides(data=subset.elevation.dropna(), label="MW")
highs_lm = lm_tides(data=subset[subset.high == True].elevation.dropna(), label="HW")
lows_lm = lm_tides(data=subset[subset.low == True].elevation.dropna(), label="LW")

spring_highs = lm_tides(data=subset[subset.spring_high == True].elevation, label="SHW")
spring_lows = lm_tides(data=subset[subset.spring_low == True].elevation, label="SLW")

neap_highs = lm_tides(data=subset[subset.neap_high == True].elevation, label="NHW")
neap_lows = lm_tides(data=subset[subset.neap_low == True].elevation, label="NLW")

fig = plt.figure(figsize=(10, 5))
ax = plt.axes()

sns.scatterplot(data=subset, x="datetime", y="elevation", color="cornflowerblue", marker=".", edgecolor=None, alpha=0.3, s=5, ax=ax)
sns.scatterplot(data=subset.loc[subset.high == True], x="datetime", y="elevation", marker=".", color="green", edgecolor=None, s=10, alpha=0.3, ax=ax)
sns.scatterplot(data=subset.loc[subset.low == True], x="datetime", y="elevation", marker=".", color="red", edgecolor=None, s=10, alpha=0.3, ax=ax)

sns.lineplot(ax=ax, x=highs_lm.index, y=highs_lm.preds, color="black", linestyle="dashed", zorder=100)
sns.lineplot(ax=ax, x=lows_lm.index, y=lows_lm.preds, color="black", linestyle="dashed", zorder=100)

sns.lineplot(ax=ax, x=spring_highs.index, y=spring_highs.preds, color="black", linestyle="dashdot", zorder=100)
sns.lineplot(ax=ax, x=neap_highs.index, y=neap_highs.preds, color="black", linestyle="dotted", zorder=100)
sns.lineplot(ax=ax, x=spring_lows.index, y=spring_lows.preds, color="black", linestyle="dashdot", zorder=102)
sns.lineplot(ax=ax, x=neap_lows.index, y=neap_lows.preds, color="black", linestyle="dotted", zorder=102)

lms = ["highs_lm", "lows_lm", "spring_highs", "spring_lows", "neap_highs", "neap_lows"]
x = data.index[-1]
for lm in lms:
    lm = eval(lm)
    ax.text(x=x, y=lm.preds[-1], s="$\zeta_{{{}}}$ $({:+.2} cm \cdot yr^{{-1}})$".format(lm.label, lm.coef * 100), fontsize="medium")
    
title = "Mongla Tidal Data from {} to {}".format(subset.index[0].strftime("%b %Y"), subset.index[-1].strftime("%b %Y"))
xlabel = ""
ylabel = "Elevation (m)"

locator = locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)

ax.xaxis.set(major_locator=locator, major_formatter=formatter)
ax.set(title=title, xlabel=xlabel, ylabel=ylabel)

fig.tight_layout()

In [None]:
def lm_grouped(group):
    (group_label, subset) = group
    
    subset = subset.set_index("datetime")
    if subset.index[0].year != subset.index[-1].year:
        group_label = "{}-{}".format(subset.index[0].year, subset.index[-1].year)
    
    data_lm = lm_tides(data=subset.elevation.dropna(), label="MW")
    highs_lm = lm_tides(data=subset[subset.high == True].elevation.dropna(), label="HW")
    lows_lm = lm_tides(data=subset[subset.low == True].elevation.dropna(), label="LW")

    spring_highs = lm_tides(data=subset[subset.spring_high == True].elevation, label="SHW")
    spring_lows = lm_tides(data=subset[subset.spring_low == True].elevation, label="SLW")

    neap_highs = lm_tides(data=subset[subset.neap_high == True].elevation, label="NHW")
    neap_lows = lm_tides(data=subset[subset.neap_low == True].elevation, label="NLW")
    
    out = {"all": data_lm.coef, "highs": highs_lm.coef, "spring_highs": spring_highs.coef, "neap_highs": neap_highs.coef, "lows": lows_lm.coef, "spring_lows": spring_lows.coef, "neap_lows": neap_lows.coef}
    
    return({group_label: out}, subset)

In [None]:
group_freq = "5YS"
grouping = data.reset_index().groupby(by=pd.Grouper(key="datetime", freq=group_freq))
g = dict()
gg = []
for i in grouping:
    if i[1].shape[0] > 14 * 24:
        d, dd = lm_grouped(i)
        g.update(d)
        gg.append(dd)
        
trends = pd.DataFrame.from_dict(g).transpose()
trends.index.name = "years"

In [None]:
extrema = data[(data.high == True) | (data.low == True) | (data.spring_high == True) | (data.spring_low == True) | (data.neap_high == True) | (data.neap_low == True)].copy()
extrema["type"] = None

for row in extrema.itertuples():
    if row.high == True:
        if extrema.loc[row.Index, "type"] is not None:
            print(row.Index)
        extrema.loc[row.Index, "type"] = "H"
    if row.low == True:
        if extrema.loc[row.Index, "type"] is not None:
            print(row.Index)
        extrema.loc[row.Index, "type"] = "L"
    if row.spring_high == True:
        if extrema.loc[row.Index, "type"] is not None and extrema.loc[row.Index, "type"] != "H":
            print(row.Index)
        extrema.loc[row.Index, "type"] = "SH"
    if row.spring_low == True:
        if extrema.loc[row.Index, "type"] is not None and extrema.loc[row.Index, "type"] != "L":
            print(row.Index)
        extrema.loc[row.Index, "type"] = "SL"
    if row.neap_high == True:
        if extrema.loc[row.Index, "type"] is not None and extrema.loc[row.Index, "type"] != "H":
            print(row.Index)
        extrema.loc[row.Index, "type"] = "NH"
    if row.neap_low == True:
        if extrema.loc[row.Index, "type"] is not None and extrema.loc[row.Index, "type"] != "L":
            print(row.Index)
        extrema.loc[row.Index, "type"] = "NL"

extrema = extrema[["elevation", "type"]]

In [None]:
yearly_ext = extrema.groupby(by=[pd.Grouper(freq="1YS"), "type"]).agg({"elevation": ["mean", "min", "max"]})
yearly_amp = data[:"2011"].amp.to_frame().groupby(by=[pd.Grouper(freq="1YS")]).agg(["mean", "min", "max"]).set_index(yearly.index.get_level_values(level=0).unique())
yearly.index = pd.MultiIndex.from_tuples([(i.strftime('%Y'), j) for (i, j) in yearly.index], names=["year", "type"])
monthly = extrema.groupby(by=[pd.Grouper(freq="1YS"), pd.Grouper(freq="1MS"), "type"]).agg({"elevation": ["mean", "min", "max"]})
monthly.index = pd.MultiIndex.from_tuples([(i.strftime('%Y'), j.strftime('%b'), k) for (i, j, k) in monthly.index], names=["year", "month", "type"])

In [None]:
yearly_ext = extrema.groupby(by=[pd.Grouper(freq="1YS"), "type"]).agg(func=["mean", "min", "max"]).elevation.reset_index().set_index("datetime")
yearly_amp = data[:"2011"].amp.to_frame().groupby(by=[pd.Grouper(freq="1YS")]).agg(["mean", "min", "max"]).amp.reset_index().set_index("datetime")
yearly_amp["type"] = "amp"
yearly = yearly_ext.append(yearly_amp).sort_index().reset_index()
# yearly.datetime = [i.strftime('%Y') for i in yearly.datetime]
yearly = yearly.set_index(keys=["datetime", "type"])

In [None]:
# fig = plt.figure(figsize=(13, 8))
# ax = plt.axes()

fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(13, 8))

highs = yearly[yearly.reset_index().type.str.contains("H").values]
lows = yearly[yearly.reset_index().type.str.contains("L").values]
amp = yearly[(yearly.reset_index().type == "amp").values]

sns.lineplot(data=highs.reset_index(), x="datetime", y="max", hue="type", hue_order=["H", "SH", "NH"], style="type", style_order=["H", "SH", "NH"], ax=ax[0])
sns.lineplot(data=lows.reset_index(), x="datetime", y="max", hue="type", hue_order=["L", "SL", "NL"], style="type", style_order=["L", "SL", "NL"], ax=ax[1])
sns.lineplot(data=amp.reset_index(), x="datetime", y="mean", hue="type", style="type", ax=ax[2])
# sns.lineplot(data=year_avg.reset_index(), x="year", y="neap_high", color="red", linestyle="dotted", label="Neap", ax=ax[0])

# sns.lineplot(data=averages.reset_index(), x="year", y="neap_low", color="red", linestyle="dotted", label="Neap", ax=ax[1])
# sns.lineplot(data=averages.reset_index(), x="year", y="spring_low", color="green", linestyle="dotted", label="Spring", ax=ax[1])

# sns.lineplot(data=averages.reset_index(), x="years", y="amp", color="black", ax=ax[2])



# lms = ["highs_lm", "lows_lm", "spring_highs", "spring_lows", "neap_highs", "neap_lows"]
# x = data.index[-1]
# for lm in lms:
#     lm = eval(lm)
#     ax.text(x=x, y=lm.preds[-1], s="$\zeta_{{{}}}$ $({:+.2} cm \cdot yr^{{-1}})$".format(lm.label, lm.coef * 100), fontsize="medium")
# title = "{} Year Trend in Mongla Tidal Data".format(eval(trends.index[0]) * -1 + 1)
# xlabel = ""
# ylabel = "$\Delta$ Elevation (m)"

# locator = mdates.YearLocator(5)
# formatter = mdates.DateFormatter('%Y')

locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)

for a in ax:
    a.xaxis.set(major_locator=locator, major_formatter=formatter)
    
ax[0].set(title="Highs", xlabel=xlabel, ylabel=ylabel)
ax[1].set(title="Lows", xlabel=xlabel, ylabel=ylabel)
ax[2].set(title="Amplitude", xlabel=xlabel, ylabel=ylabel)
ax[0].set(xticklabels=[])
ax[1].set(xticklabels=[])

ax[1].get_legend().remove()

fig.tight_layout()

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(13, 8))



sns.lineplot(data=trends.reset_index(), x="years", y="spring_highs", color="green", linestyle="dotted", label="SHW", ax=ax[0])
sns.lineplot(data=trends.reset_index(), x="years", y="neap_highs", color="red", linestyle="dotted", label="NHW", ax=ax[0])

sns.lineplot(data=trends.reset_index(), x="years", y="neap_lows", color="red", linestyle="dotted", label="NLW", ax=ax[1])
sns.lineplot(data=trends.reset_index(), x="years", y="spring_lows", color="green", linestyle="dotted", label="SLW", ax=ax[1])

sns.lineplot(data=trends.reset_index(), x="years", y="all", color="black", linestyle="solid", label="All", ax=ax[2])
sns.lineplot(data=trends.reset_index(), x="years", y="highs", color="green", linestyle="dashed", label="HW", ax=ax[2])
sns.lineplot(data=trends.reset_index(), x="years", y="lows", color="red", linestyle="dashed", label="LW", ax=ax[2])

ax[0].set(title="Highs", xlabel=xlabel, ylabel=ylabel)
ax[1].set(title="Lows", xlabel=xlabel, ylabel=ylabel)
ax[2].set(title="All", xlabel=xlabel, ylabel=ylabel)
ax[0].set(xticklabels=[])
ax[1].set(xticklabels=[])

ax[1].get_legend().remove()

fig.tight_layout()