#  Similarities
> Identifying the similarities between parties and members of the parliament based on their average voting behavior during roll call votes.

In [None]:
%load_ext autoreload
%autoreload 2

## Setup

In [None]:
import matplotlib.pyplot as plt
import polars as pl
from bundestag.fine_logging import setup_logging
import logging
from bundestag.paths import get_paths
from bundestag.ml.similarity import (
    get_votes_by_party,
    pivot_party_votes_df,
    prepare_votes_of_mdb,
    align_mdb_with_parties,
    compute_similarity,
    plot_overall_similarity,
    plot_similarity_over_time,
    plot,
    align_party_with_party,
    align_party_with_all_parties,
    compute_similarity,
)
from plotnine import (
    ggplot,
    aes,
    geom_point,
    scale_color_manual,
)

logger = logging.getLogger(__name__)
setup_logging(logging.DEBUG)

paths = get_paths("../data")
paths

In [None]:
file = paths.preprocessed_bundestag / "bundestag.de_votes.parquet"
file

In [None]:
df = pl.read_parquet(file)

## Voting outcomes by party

In [None]:
party_votes = get_votes_by_party(df)

In [None]:
party_votes.head()

In [None]:
party = "CDU/CSU"

(
    ggplot(
        party_votes.filter(pl.col("Fraktion/Gruppe") == party),
        aes("date", "fraction", color="vote"),
    )
    + geom_point(alpha=0.3)
    + scale_color_manual(
        breaks=["ja", "nein", "nichtabgegeben", "Enthaltung", "ungültig"],
        values=["green", "red", "grey", "orange", "salmon"],
    )
)

## Member-Party similarity

A member of the German federal parliament is a Mitglied des Bundestages, so `mdb`.

In [None]:
party_votes_pivoted = pivot_party_votes_df(party_votes)
party_votes_pivoted.head()

In [None]:
mdb = "Peter Altmaier"
mdb_votes = prepare_votes_of_mdb(df, mdb)
mdb_votes.head()

In [None]:
mdb_vs_parties = align_mdb_with_parties(mdb_votes, party_votes_pivoted)
mdb_vs_parties = compute_similarity(mdb_vs_parties, suffix="_party")

mdb_vs_parties.head(3)

In [None]:
mdb_vs_parties[["Fraktion/Gruppe", "similarity"]]

In [None]:
plot_overall_similarity(
    mdb_vs_parties,
    x="Fraktion/Gruppe_party",
    title=f"Overall similarity of {mdb} with all parties",
)
plt.show()

Similarity over time

In [None]:
mdb_vs_parties.columns

In [None]:
plot_similarity_over_time(
    mdb_vs_parties, "Fraktion/Gruppe_party", title=f"{mdb} vs time"
)
plt.show()

In [None]:
plot(
    mdb_vs_parties,
    title_overall=f"Overall similarity of {mdb} with all parties",
    title_over_time=f"{mdb} vs time",
)
plt.tight_layout()
plt.show()

## Party-Party similarity

In [None]:
party_a = "SPD"
party_b = "BÜ90/GR"
partyA_vs_partyB = align_party_with_party(
    party_votes_pivoted, party_a=party_a, party_b=party_b
)
partyA_vs_partyB = compute_similarity(partyA_vs_partyB, suffix="_b")

partyA_vs_partyB.head(3)

In [None]:
party = "SPD"
partyA_vs_rest = align_party_with_all_parties(party_votes_pivoted, party)
partyA_vs_rest = compute_similarity(partyA_vs_rest, suffix="_b")

partyA_vs_rest.head(3)

In [None]:
plot(
    partyA_vs_rest,
    title_overall=f"Overall similarity of {party} with all parties",
    title_over_time=f"{party} vs time",
    party_col="Fraktion/Gruppe_b",
)
plt.tight_layout()
plt.show()