# Analytics
> Investigating voting behavior.

In [None]:
# hide
# hide
import sys

import numpy as np
import pandas as pd
import plotly.express as px

from bundestag import similarity

%load_ext autoreload
%autoreload 2

In [None]:
sys.path.append("..")

In [None]:
%%time
df = pd.read_parquet("../votes.parquet")

In [None]:
%%time
other_cols = ["date", "Fraktion/Gruppe"]
df_squished = similarity.get_squished_dataframe(df, other_cols=other_cols)

In [None]:
df_squished.head()

In [None]:
appearances = df_squished["Bezeichnung"].value_counts()

In [None]:
display(appearances.head(20), appearances.tail(20))

In [None]:
df_squished.describe()

In [None]:
df_squished["Fraktion/Gruppe"].value_counts()

In [None]:
df_squished["vote"].value_counts(normalize=True)

In [None]:
df_squished.loc[
    df_squished["vote"] == "nichtabgegeben", "Bezeichnung"
].value_counts()

In [None]:
def add_share(df: pd.DataFrame):
    df.loc[:, "share"] = (
        df["count"] / df["count"].groupby(level="Bezeichnung").sum()
    )
    assert np.allclose(df["share"].groupby(level="Bezeichnung").sum(), 1)
    return df


vote_stats = (
    df_squished.groupby(["Bezeichnung", "vote"])
    .size()
    .to_frame()
    .rename(columns={0: "count"})
    .pipe(add_share)
)
vote_stats.head()

In [None]:
vote_stats.loc[pd.IndexSlice[:, "nichtabgegeben"], :].sort_values(
    "share", ascending=False
).head(20)

In [None]:
vote_stats.loc[vote_stats["count"] > 25].loc[
    pd.IndexSlice[:, "nichtabgegeben"], :
].sort_values("share", ascending=True).head(20)

In [None]:
fig = px.line(
    df_squished.groupby(["date"])["Bezeichnung"]
    .nunique()
    .to_frame()
    .reset_index(),
    x="date",
    y="Bezeichnung",
)
fig.update_yaxes(range=(0, 720))

In [None]:
fig = px.scatter(
    df_squished.groupby(["date", "vote"])["Bezeichnung"]
    .nunique()
    .to_frame()
    .reset_index(),
    x="date",
    y="Bezeichnung",
    color="vote",
)
fig.update_yaxes(range=(0, 720))

In [None]:
fig = px.scatter(
    df_squished.groupby(["date", "vote", "Fraktion/Gruppe"])["Bezeichnung"]
    .nunique()
    .to_frame()
    .reset_index(),
    x="date",
    y="Bezeichnung",
    color="vote",
    facet_row="Fraktion/Gruppe",
    height=2000,
)
fig.update_yaxes(range=(0, 720))

party agreement vs time