In [None]:
import sys
sys.path.append("..")

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
pd.options.plotting.backend = "plotly"

from src.parking import *

In [None]:
# get all relevant snapshots data
df_all = parking_dataframe(per_snapshot=True, with_name=True)
df_all["week"] = df_all.index.get_level_values("date").strftime("%Y-%W")
id_2_name = {t[1]: t[2] for t in df_all.index if isinstance(t[2], str)}
# get median capacity per id
id_2_capacity = df_all["capacity"].groupby("id").median().replace(np.nan, 0).astype(int).to_dict()
#df_all

In [None]:
# convert "category" to a number 
MAPPING = {1.: 5, 2.: 15, 3.: 35, 4.: 55}
df_all["num_free"] = df_all["category"].map(lambda c: MAPPING.get(c))
#df_all

In [None]:
# add "mean_capacity" per week to each row
mean_cap = (
    pd.DataFrame(df_all[["week", "capacity"]]).reset_index().set_index(["date", "id", "week"]).unstack("id")
    .groupby("week").median()
    .stack("id")
)
mean_lookup = mean_cap["capacity"].to_dict()
df = df_all.reset_index().set_index(["week", "id"])
df_all["mean_capacity"] = df.index.map(lambda i: mean_lookup.get(i, np.nan))
df_all

In [None]:
# get a fixed color for each space ID
id_2_color = {
    n: f"#{hashlib.md5(n[:6].encode('utf-8')).hexdigest()[:6]}"
    for n in sorted(df_all.index.get_level_values("id").unique())
}
id_2_color["all"] = "#000000"

In [None]:
# number of listed spaces per day
# df_all["listed"].unstack("id").resample("1d", level="date").sum().clip(0, 1).sum(axis=1).plot.bar()

# num listed, num valid, num "active"

In [None]:
# num listed and num valid
RS = "1d"
df = df_all.droplevel("name")[["listed", "valid"]].unstack("id")
df = df.resample(RS).mean().groupby(level=0, axis=1).sum()#.iloc[1:].plot()#.bar(barmode="group")
df_act = df_all["category"].unstack("id").resample("1h", level="date").mean().diff().resample(RS).mean()
df_act = (df_act != 0).replace(np.nan, 0).astype(int).sum(axis=1)
df["active"] = df_act
df.iloc[1:-1].plot(
    title="Number of listed/valid/active parking lots per day",
    labels={"value": "Number of spaces", "variable": "category"},
)

## mean category per id

In [None]:
import hashlib
# mean category per ID
df = df_all["category"].unstack("id").resample("1m", level="date").mean()
# filter only the active ones
df_mean = df.diff().abs().mean()
df = df.loc[:, (df_mean > .1)]
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df["all"] = df.mean(axis=1)
#df["dash"] = 
df.plot(
    title="Mean \"category\" per month and station (selection) (1: <= 10, 2: >10, 3: >30, 4: >50)",
    labels={"value": "category", "id": "parking space"},
    color_discrete_sequence=df.columns.map(lambda n: id_2_color[n[:6]]),
    # TODO: this is not working as expected
    # line_dash_sequence=df.columns.map(lambda n: "dot" if n == "all" else "solid").to_list(),
)

# analysis of changes

In [None]:
# average CHANGES 
df = df_all.droplevel("name")["category"].unstack("id").resample("1h").mean().diff().abs()
df = (df#.mean(axis=1)
.resample("1w").mean()
).iloc[1:-1] # drop first and last incomplete buckets
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot.bar(
    title="Changes of \"category\" from one hour to the next per week and station",
    labels={"value": "changes", "id": "parking space"},
    color_discrete_sequence=df.columns.map(lambda n: id_2_color[n[:6]]),
)

In [None]:
df = df_all.droplevel("name")["category"].unstack("id").resample("30min").mean().diff().abs()
df["hour"] = df.index.get_level_values("date").hour
df = df.groupby("hour").mean()
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot.bar(
    title="Changes of \"category\" from one 30 minutes to the next per hour-of-day and station",
    labels={"value": "changes", "id": "parking space", "hour": "hour of day"},
    color_discrete_sequence=df.columns.map(lambda n: id_2_color[n[:6]]),
)


In [None]:
def get_per_hour(min_date, max_date):
    df = df_all.droplevel("name")["category"]
    df = df[df.index.get_level_values("date") >= min_date]
    df = df[df.index.get_level_values("date") < max_date]
    df = df.unstack("id").resample("30min").mean().diff().abs()
    df["hour"] = df.index.get_level_values("date").hour
    df["weekday"] = df.index.get_level_values("date").strftime("%w %A")
    df = df.groupby(["hour", "weekday"]).mean()
    df = df.mean(axis=1).unstack("weekday")
    #df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
    return df

dfs = [
    get_per_hour(datetime.datetime(2020, 1, 1), datetime.datetime(2020, 4, 1)),
    get_per_hour(datetime.datetime(2020, 4, 1), datetime.datetime(2020, 7, 1)),
    get_per_hour(datetime.datetime(2020, 7, 1), datetime.datetime(2020, 10, 1)),
    get_per_hour(datetime.datetime(2020, 10, 1), datetime.datetime(2021, 1, 1)),
    get_per_hour(datetime.datetime(2021, 1, 1), datetime.datetime(2021, 4, 1)),
    get_per_hour(datetime.datetime(2021, 4, 1), datetime.datetime(2021, 7, 1)),
    get_per_hour(datetime.datetime(2021, 7, 1), datetime.datetime(2021, 10, 1)),
    get_per_hour(datetime.datetime(2020, 1, 1), datetime.datetime(2021, 10, 1)),
]
#get_per_hour(datetime.datetime(2020, 1, 1), datetime.datetime(2020, 4, 1))        

In [None]:
fig = make_subplots(
    rows=2, cols=4,
    vertical_spacing=0.12,
    horizontal_spacing=0.01,
    shared_yaxes="all",
    subplot_titles=[
        "2020 Q1", "2020 Q2", "2020 Q3", "2020 Q4", "2021 Q1", "2021 Q2", "2021 Q3", 
        "all (2020 Q1 - 2021 Q3)"
    ],
)
fig.update_layout(
    title="Activity per hour-of-day and weekday, for each quarter of year", 
    height=700,
)
for i, df in enumerate(dfs):
    for trace in df.plot(
        color_discrete_sequence=["#777", "#c66", "#6c6", "#66c", "#6cc", "#cc6", "#ccc"],
    ).data:
        if i != 0:
            trace.showlegend = False
        fig.add_trace(trace, row=1+i // 4, col=1+i % 4)
fig

In [None]:

df = df_all.droplevel("name")["category"].unstack("id").resample("1h").mean().diff().abs()
df = (df#.mean(axis=1)
.resample("1w").mean()
).iloc[1:-1] # drop first and last incomplete buckets
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot.bar(
    title="Changes of \"category\" from one hour to the next per week and station",
    labels={"value": "changes", "id": "parking space"},
    color_discrete_sequence=df.columns.map(lambda n: id_2_color[n[:6]]),
)

# percentage of each parking category per day

In [None]:
df = (
    df_all[df_all["valid"] == 1].droplevel("name")
    .reset_index().set_index(["date", "id", "category"])
    ["listed"].unstack("category").replace(np.nan, 0)
    .resample("1d", level="date").mean() 
    * 100
)
df.columns = ["<= 10", "> 10", "> 30", "> 50"]
df.plot(
    title="Percentage of each \"category\" per day",
    labels={"value": "percent of category", "variable": "category"},
    color_discrete_sequence=["#a00", "#850", "#680", "#3a0"],
)

In [None]:
datetime.datetime(2020, 3, 16).weekday()

In [None]:
df["day"] = df.index.strftime("%w %A")
df.groupby("day").mean().plot()

# estimation of percentage occupied
Does not really work

In [None]:
# mean estimated üercentage per ID
df = (df_all["num_free"] / df_all["mean_capacity"] * 100).unstack("id").resample("1d", level="date").mean()
df_mean = df.diff().abs().mean()
df = df.loc[:, (df_mean > .15)]
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot()

In [None]:
MAPPING = {1: 5, 2: 15, 3: 35, 4: 55}
df = df_all["category"][~np.isnan(df_all["category"])].astype(int).map(lambda c: MAPPING[c])
df = df.unstack("id").resample("1d", level="date").mean()
df = df.div(df.columns.map(lambda i: id_2_capacity[i]), axis=1) * 100.
df = df.loc[:, df.std() > 5]
df = df.sort_index(axis=1)
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot()

# capacity

In [None]:
# mean capacity per ID
df = df_all["capacity"].unstack("id").resample("1d", level="date").mean()
df_mean = df.diff().abs().mean()
df = df.loc[:, (df_mean > .1) & (df_mean < 10)]
df.columns = df.columns.map(lambda c: f"{c} {id_2_name[c]}")
df.plot(
    title="Capacity per day and station (selection)",
    labels={"value": "capacity", "id": "parking space"},
    color_discrete_sequence=df.columns.map(lambda n: id_2_color[n[:6]]),
    height=600,
)

In [None]:
id_2_capacity["100084"]

In [None]:
df = df_all[df_all.index.get_level_values("id") == "100103"]["capacity"].resample("30min", level="date").mean()
df.plot()

In [None]:
# average CHANGES 

df = df_all.droplevel("name")["category"].unstack("id").resample("1h").mean().diff().abs()
(df#.mean(axis=1)
.resample("1w").mean().plot()
)

## plot individal

In [None]:
df = df_all[df_all.index.get_level_values("id") == "103129"]
df["valid"].resample("1d", level="date").mean().plot()