In [1]:
import pandas as pd
import numpy as np

from ipywidgets import interact, interact_manual, IntRangeSlider
from matplotlib import pyplot as plt
from scipy.stats import spearmanr, pearsonr

In [2]:
wrecks_df = pd.read_excel("data/shipwrecks_with_bermuda.xlsx")
wrecks_df

Unnamed: 0,date,lat,lon,near,in_bermuda
0,1996-05-13 05:50:00,40.876665,-91.031665,United States of America,False
1,1996-05-13 05:50:01,40.876665,-91.031665,United States of America,False
2,1996-05-13 05:50:01,40.876665,-91.031665,United States of America,False
3,1996-05-13 05:50:02,40.876665,-91.031665,United States of America,False
4,1996-05-13 05:50:02,40.876665,-91.031665,United States of America,False
...,...,...,...,...,...
106261,2015-06-22 14:10:01,37.310490,-89.513615,United States of America,False
106262,2015-06-22 16:35:00,25.760800,-79.956670,United States of America,True
106263,2015-06-22 16:35:00,25.760800,-79.956670,United States of America,True
106264,2015-06-24 13:52:00,29.732314,-95.127879,United States of America,False


In [3]:
wrecks_df.dtypes

date          datetime64[ns]
lat                  float64
lon                  float64
near                  object
in_bermuda              bool
dtype: object

In [4]:
def inspect_wrecks_by_year(from_to: tuple[int, int], show_wrecks_in_Bermuda_Triangle: bool, show_total_wrecks: bool, show_wrecks_outside_Bermuda_Triangle: bool, normalized_by_counts: bool) -> None:
    from_year, to_year = from_to
    lb = lambda year: str(year) <= wrecks_df.date
    ub = lambda year: wrecks_df.date < str(year)
    nm = lambda year: np.sum(lb(year) & ub(year + 1))

    y_total = np.array([nm(year) for year in range(from_year, to_year + 1)])
    y_bermuda = np.array([np.sum(lb(year) & ub(year + 1) & wrecks_df.in_bermuda) for year in range(from_year, to_year + 1)])
    y_not_bermuda = y_total - y_bermuda

    pearson_corr = pearsonr(y_bermuda, y_not_bermuda).correlation
    spearman_corr, _ = spearmanr(y_bermuda, y_not_bermuda)

    print(f"Pearson corr. (Bermuda vs outside): {pearson_corr:.4f}")
    print(f"Spearman rank corr. (Bermuda vs outside): {spearman_corr:.4f}")

    if normalized_by_counts:
        y_total = y_total / np.sum(y_total)
        y_bermuda = y_bermuda / np.sum(y_bermuda)
        y_not_bermuda = y_not_bermuda / np.sum(y_not_bermuda)

    x = np.arange(from_year, to_year + 1)

    if show_total_wrecks:
        plt.plot(x, y_total, label="total wrecks" if not normalized_by_counts else "% of total wrecks")
    if show_wrecks_in_Bermuda_Triangle:
        plt.plot(x, y_bermuda, label="wrecks in Bermuda Triangle" if not normalized_by_counts else "% of wrecks in Bermuda Triangle")
    if show_wrecks_outside_Bermuda_Triangle:
        plt.plot(x, y_not_bermuda, label="wrecks outside Bermuda Triangle" if not normalized_by_counts else "% of wrecks outside Bermuda Triangle")
    plt.xlabel("year")
    plt.ylabel("# of Wrecks" if not normalized_by_counts else "% of Wrecks")
    plt.title(f"wrecks per year normalized by subpopulation" if normalized_by_counts else "wrecks per year per subpopulation")
    plt.legend()

interact(inspect_wrecks_by_year, 
         from_to=IntRangeSlider(min=wrecks_df.date.min().year, 
                                max=wrecks_df.date.max().year,
                                step=1,
                                value=(wrecks_df.date.min().year, wrecks_df.date.max().year)),
         show_wrecks_in_Bermuda_Triangle=True,
         show_total_wrecks=False,
         show_wrecks_outside_Bermuda_Triangle=True,
         normalized_by_counts=True);

interactive(children=(IntRangeSlider(value=(1996, 2015), description='from_to', max=2015, min=1996), Checkbox(…

In [5]:
def inspect_export() -> None:
    df = wrecks_df.copy()
    df[["date", "in_bermuda"]].to_csv("site/data/timeseries.csv", index=False)

interact_manual(inspect_export);

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…