# Turning tables

AUTHOR: Michal Mochtak (michal.mochtak@ru.nl), Peter Rupnik (peter.rupnik@ijs.si), Nikola Ljubešić

DATE: 2024-06-24

---

In this notebook we compare sentiment of politicians in opposition vs in coalition for every country.

On the first run, the data will be downloaded from the internet. 

Next, some preprocessing will be performed so that only MPs of national parliaments are examined, and new statistics will be calculated on the filtered data.

We then plot statistics as a function of latitude, and display them on a choropleth map.

In [57]:
!pip install pandas==2.2.2 plotly seaborn scipy iso3166 numpy matplotlib
import pandas as pd
from pathlib import Path
import seaborn as sns
from scipy.stats import pearsonr
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from iso3166 import countries
import plotly


if not Path("speeches.csv.zip").exists():
    from os import system

    system(
        "wget https://huggingface.co/datasets/5roop/parlasent_data/resolve/main/speeches.csv.zip"
    )
df = pd.read_csv("speeches.csv.zip")




  df = pd.read_csv("speeches.csv.zip")


In [58]:
df.country.unique()

array(['UA', 'PL', 'AT', 'HR', 'FI', 'BE', 'BG', 'EE', 'LV', 'FR', 'GB',
       'BA', 'IT', 'SI', 'ES', 'IS', 'TR', 'NL', 'SE', 'PT', 'HU', 'DK',
       'NO', 'GR', 'CZ', 'RS'], dtype=object)

In [59]:
# Filtering:
# Keep only MPs
c1 = df.Speaker_MP == "MP"

# Keep only speeches where speaker is either Opposition or Coalition:
c2 = df.Party_status.isin(["Opposition", "Coalition"])
df = df[c1 & c2].reset_index(drop=True)

# Keep only speakers that have at least 10 speeches.
grouped = (
    df.groupby(["Speaker_name", "Party_status"])
    .size()
    .reset_index(name="count")
    .pivot(index="Speaker_name", columns="Party_status", values="count")
    .fillna(0)
)
filtered_speakers = grouped[
    (grouped["Coalition"] >= 10) & (grouped["Opposition"] >= 10)
].index

ndf = df[df["Speaker_name"].isin(filtered_speakers)]
results = dict()
for country in sorted(df.country.unique().tolist()):
    subset = ndf[ndf.country == country]
    pivoted = (
        subset.groupby(["Speaker_name", "Party_status"])
        .logits_pondered.mean()
        .reset_index()
        .pivot(index="Speaker_name", columns="Party_status", values="logits_pondered")
    )
    try:
        pivoted["Difference"] = pivoted["Coalition"] - pivoted["Opposition"]
        current_mean = pivoted.Difference.mean()
        current_support = pivoted.shape[0]
        if current_support < 10:
            raise KeyError()
    except KeyError:
        current_mean = None
        current_support = 0
    results[country] = {"Difference": current_mean, "Support": current_support}
turning = pd.DataFrame(results).T.reset_index(names="country")
turning.loc[-1] = ["AD", -1.0, 0]
turning.reset_index(drop=True)
turning.loc[-1] = ["LI", 1.0, 0]
turning.reset_index(drop=True)


Unnamed: 0,country,Difference,Support
0,AT,0.709242,56.0
1,BA,-0.065664,15.0
2,BE,0.251016,70.0
3,BG,0.451272,43.0
4,CZ,0.567185,84.0
5,DK,0.468849,104.0
6,EE,,0.0
7,ES,0.49455,74.0
8,FI,0.534082,93.0
9,FR,0.20912,10.0


In [60]:
ndf.country.unique()

array(['UA', 'AT', 'HR', 'FI', 'BE', 'BG', 'LV', 'FR', 'BA', 'IT', 'SI',
       'ES', 'TR', 'NL', 'PT', 'HU', 'DK', 'GR', 'CZ', 'RS'], dtype=object)

In [61]:
turning

Unnamed: 0,country,Difference,Support
0,AT,0.709242,56.0
1,BA,-0.065664,15.0
2,BE,0.251016,70.0
3,BG,0.451272,43.0
4,CZ,0.567185,84.0
5,DK,0.468849,104.0
6,EE,,0.0
7,ES,0.49455,74.0
8,FI,0.534082,93.0
9,FR,0.20912,10.0


In [62]:
# Prepare a mapper from country codes as per ISO-3166-2 to ISO-3166-3:
from iso3166 import countries

iso2_to_iso3_mapper = {c.alpha2: c.alpha3 for c in countries}


turning["iso3"] = turning.country.apply(lambda s: iso2_to_iso3_mapper[s])


import plotly.express as px

fig = px.choropleth(
    turning,
    locations="iso3",
    locationmode="ISO-3",
    color="Difference",
    color_continuous_scale="Oranges",
    # scope="europe",
    title="Map of Coalition - Opposition scores",
    height=600,
    width=800,
    projection="mercator",
    center=dict(lat=45, lon=14),
    basemap_visible=True,
    fitbounds="locations",
)

# Display the map
fig.show()

KeyError: 'L'