In [None]:
import sys
sys.path.append("..")

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots

from src.elevators import *
from src.mapper import *

pd.options.plotting.backend = "plotly"

In [None]:
#station_mapper = ObjectMapper("stations")
#elevators_mapper = ElevatorsMapper()
#station_mapper(elevators_mapper("10001649")["stationnumber"])

In [None]:
# read sum of snapshots/listed/active per day and id
df_all = elevators_per_day(caching=True)#"write")
df_all = df_all[df_all.index.get_level_values("date") >= "2020-01-27"]
df_all["listed_f"] = df_all["listed"] / df_all["snapshots"]
df_all["active_f"] = df_all["active"] / df_all["snapshots"]
df_all["inactive_f"] = df_all["inactive"] / df_all["snapshots"]

STRING_FIELDS = [n for n, dt in df_all.dtypes.iteritems() if isinstance(dt, np.object)]
for f in STRING_FIELDS:
    df_all[f] = df_all[f].replace(np.nan, "-")#.astype("category")

df_all_listed = df_all[df_all["listed"] > 0]
df_all

In [None]:
def plot_defects(df_all: pd.DataFrame):
    # get the defect machines per day
    dfs = []
    for mark in (0.2, 0.4, 0.6, 0.8, 1.):
        df = df_all[df_all["inactive_f"] >= mark]
        dates = df.index.get_level_values("date").unique().sort_values()
        df_d = pd.DataFrame(
            [df[df.index.get_level_values("date") == d].shape[0] for d in dates],
            index=dates,
            columns=[f"{mark*100:.0f}%"]
        )
        dfs.append(df_d)
    df = pd.concat(dfs, axis=1)
    
    return df.plot(
        title="Number of inactive elevators/escaltors per day", 
        labels={
            "variable": "Percentage inactive per day", 
            "value": "Number of inactive machines",
        },
        color_discrete_sequence=["#0d0", "#3b0", "#690", "#860", "#c30"]
    )

In [None]:
plot_defects(df_all)

In [None]:
df_all["listed"].unstack("id").clip(0, 1).sum(axis=1).plot(
    title="Number of listed machines per day",
    labels={"value": "num listed machines"},
)

In [None]:
df_all[df_all["region"] == "-"]["snapshots"].resample("1d", level="date").mean().plot()

In [None]:
SPLIT = "bearer"
(df_all_listed.reset_index().set_index(["date", "id", SPLIT])
 ["active_f"].unstack(["id", SPLIT]).clip(0, 1)
 .groupby(SPLIT, axis=1).mean()
 .resample("1d").mean().plot()#.bar(barmode="group")
)

In [None]:
df_all.reset_index().groupby("bearer")[["id", "zentrale"]].describe()#.sort_values("unique")

In [None]:
df = df_all_listed["active_f"].unstack("id")

df_mean = df[df.index.year == 2021]
df_mean = df_mean[df_mean.index.month >= 7].mean()
df = df.sort_index(axis=1, key=lambda c: df_mean[c])

px.imshow(df.T, height=1300)

In [None]:
124.9-124*.64

In [None]:
#df = df_all[df_all["bearer"] == "Verband Region Stuttgart"]
SPLIT = "bearer" 
num_devices = df_all_listed.reset_index().groupby(SPLIT)["id"].describe()["unique"]
df = df_all_listed.reset_index().set_index(["date", "id", SPLIT])
df = df["active_f"].unstack(SPLIT) * 100.
df = df.resample("1d", level="date").mean()
df_mean = df[df.index.year == 2021]
df_mean = df_mean[df_mean.index.month >= 6].mean()
df = df.sort_index(axis=1, key=lambda c: df_mean[c])
#df.columns = df.columns.astype(str)
df.columns = df.columns.map(lambda c: f"{c} ({num_devices[c]})")
px.imshow(
    df.T,
    title="Mean activity per day and \"Aufgabenträger\"",
    labels={"bearer": "Aufgabenträger"},
    height=700,
)

In [None]:
#df_all.reset_index().set_index(["date", "id", "region"])["active"].unstack("region").resample("1w", level="date").mean().plot()
df = df_all.groupby("operator").sum()
(df["active"] / df["snapshots"]).sort_values().plot.bar()

In [None]:
#plot_defects(df_all[df_all["operator"] == "DB Station&Service"])
plot_defects(df_all[df_all["operator"] == "-"])

# geo-plot of difference in activity

In [None]:
def geo_df(df_all: pd.DataFrame, label: str = "bearer") -> pd.DataFrame:
    df = df_all[df_all["geo_coords"] != "-"]
    
    label_mapping = df.reset_index().groupby("geo_coords")[label].unique()
    
    df = df.groupby("geo_coords").mean()
    df["label"] = label_mapping.map(lambda c: c[0])
    df["lat"] = df.index.map(lambda i: i[1] if i[1] > 30 else i[0])
    df["lon"] = df.index.map(lambda i: i[0] if i[0] < 30 else i[1])
    #df["city"] = group[""]
    return df#.reset_index().set_index(["lat", "lon"])

date = df_all_listed.index.get_level_values("date")
df_1 = geo_df(df_all_listed[(date >= "2021-01") & (date < "2021-6")])
df_2 = geo_df(df_all_listed[(date >= "2021-06") & (date < "2022-01")])
df = df_2
df["active_change"] = 100 * (df_2["active_f"] - df_1["active_f"])

df["size"] = 10#(df["active_f"].replace(np.nan, 0) - df["active_f"].min())# / (df["active_f"].max() - df["active_f"].min())
df.sort_values("active_change", inplace=True, ascending=False)

fig = px.scatter_geo(
    df,
    lat="lat", lon="lon",
    color="active_change", 
    opacity=.001 * df["active_change"].replace(np.nan, 0).abs(),
    size_max=40,
    size="size",
    hover_data=["label", "active_f"],
    scope="europe",
    fitbounds="locations",
    color_continuous_scale=["#c00", "#ccc", "#0c0"],
    labels={"active_change": "change of activity %"},
    height=1000,
    title="Change of mean elevator activity between first and second half of 2021",
)
fig

In [None]:
df = df_all_listed#[df_all_listed["bearer"] == "Verband Region Stuttgart"]
date = df.index.get_level_values("date")
df = geo_df(df[(date >= "2021-06") & (date < "2021-11")], label="station_name")

df["size"] = 10#(df["active_f"].replace(np.nan, 0) - df["active_f"].min())# / (df["active_f"].max() - df["active_f"].min())
df.sort_values("active_f", inplace=True, ascending=False)

fig = px.scatter_mapbox(
    df,
    #df[df["active_f"] < 0.6],
    lat="lat", lon="lon",
    color="active_f", 
    opacity=.5 * (1. - .5*df["active_f"].replace(np.nan, 0)),
    size_max=40,
    size="size",
    hover_data=["label", "active_f"],
    #scope="europe",
    #fitbounds="locations",
    mapbox_style="open-street-map",
    color_continuous_scale=["#c00", "#0c0"],
    range_color=[0, 1],
    labels={"active_change": "change of activity %"},
    height=1000,
    title="Change of mean elevator activity between first and second half of 2021",
)

fig

In [None]:
[n for n in df_all["station_name"].unique() if "Stuttgart" in n]

In [None]:
df_all[df_all["station_name"] == "Stuttgart Hbf"]