# Goal: Get some interesting baseline stats.

- Something on monthly/annual increases in particular spots. "Twice the number here that we saw last year"

In [None]:
import polars as pl
import sqlite3
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

from bimodal.cli import YEAR_FILES, load_and_clean_raw

In [None]:
(
    data.with_columns(
        pl.col("record_time").str.to_datetime()
    )
    .filter(
        (pl.col("site_name") == "Basin Reserve") & 
        (pl.col("record_time").dt.year() == 2021) &
        (pl.col("record_time").dt.month() == 8)
    )
)

In [None]:
RAW_DATA_PATH = Path("../data/raw_counter")

In [None]:
data = load_and_clean_raw([RAW_DATA_PATH / yf for yf in YEAR_FILES.values()])

In [None]:
x = (
    data.with_columns(
        pl.col("record_time").str.to_datetime()
    )
    .filter((pl.col("site_name") == "Basin Reserve") & (pl.col("record_time").dt.year() >= 2021))
    .sort("record_time")
    .groupby_dynamic("record_time", every="1mo")
    .agg(
        pl.col("count_incoming").sum(),
        pl.col("count_outgoing").sum(),
    )
    .with_columns(
        pl.col("record_time").dt.year().alias("year"),
        (
            pl.col("record_time")
            .dt
            .month()
            .map_dict(
                {
                    1: "Jan",
                    2: "Feb",
                    3: "Mar",
                    4: "Apr",
                    5: "May",
                    6: "Jun",
                    7: "Jul",
                    8: "Aug",
                    9: "Sep",
                    10: "Oct",
                    11: "Nov",
                    12: "Dec",
                },
                return_dtype=str
            )
            .alias("month")
        ),
    )
)

In [None]:
x

In [None]:
x.to_pandas()

In [None]:
c = sns.color_palette("crest", n_colors=6)
type(c)

In [None]:
sns.set_theme()
sns.set(rc={'figure.figsize':(10,6)})

sns.relplot(
    x.filter(pl.col("count_incoming") > 0).to_pandas(), 
    x="month", 
    y="count_incoming", 
    hue="year", 
    kind="line", 
    palette=c,
    height=4,
    aspect=2,
).set(title="Bike count: Basin Reserve (Incoming)");

plt.xlabel("Month")
plt.ylabel("Count")
plt.ylim(0,6000)


plt.show();

In [None]:
sns.relplot(x.filter(pl.col("count_outgoing") > 0).to_pandas(), x="month", y="count_outgoing", hue="year", kind="line", palette=c);

In [None]:
plt.plot((
    data.with_columns(
        pl.col("record_time").str.to_datetime()
    )
    .filter(pl.col("site_name") == "Basin Reserve")
    .sort("record_time")
    .groupby_dynamic("record_time", every="1mo")
    .agg(
        pl.col("count_incoming").sum(),
        pl.col("count_outgoing").sum(),
    )
)["count_outgoing"][45:])