# Champions of sentiment discourse

AUTHOR: Michal Mochtak (michal.mochtak@ru.nl), Peter Rupnik (peter.rupnik@ijs.si), Nikola Ljubešić

DATE: 2024-06-24

---

In this notebook we look into specific countries and their sentiment scores on speaker- and party-level.

On the first run, the data will be downloaded from the internet. In the next cell a function was prepared to filter the dataset by specific conditions (e.g. taking only the MPs that have a specific number of speeches on the record). In the next cells we will inspect two countries in a comparable time frame, Croatia and France, and then the entire corpus across full time span.

In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
from pathlib import Path
import seaborn as sns
from IPython.display import display
if not Path("speeches.csv.zip").exists():
    from os import system
    system("wget https://huggingface.co/datasets/5roop/parlasent_data/resolve/main/speeches.csv.zip")
df = pd.read_csv("speeches.csv.zip")

  df = pd.read_csv("speeches.csv.zip")


In [2]:
def calculate_sentiment(target="Speaker_name", *, country=None, term=None, filter=None, sort_by="percentage_of_negative"):
    global df
    all_countries = df.country.unique().tolist()
    if country == None:
        country = input(f"Choose country from {all_countries} \n(empty for all): ")

    # Filtering
    # Select speeches from a specific country:
    c0 = df.country == country
    if country in ["all", ""]:
        c0 = pd.Series([True for i in df.country])
    # Keep only MPs
    c1 = (df.Speaker_MP == "MP")
    # Limit searches to speeches longer than 100 characters:
    c2 = df.char_length >= 100
    # Include only speakers with at least 10 speeches:
    gb = df[c0&c1&c2].groupby("Speaker_name").logits_pondered.count().reset_index()
    speakers_to_keep = gb.Speaker_name[gb.logits_pondered >= 10]
    c3 = df.Speaker_name.isin(speakers_to_keep)
    ndf = df[c0&c1&c2&c3]
    if term == None:
        print(f"Available terms:")
        display(ndf.groupby("Term").agg({
            "Date": [min, max, "count"],
        }).sort_values(("Date", "min")), clear=True, )
        term = input(f"Choose term from {ndf.Term.unique().tolist()} (empty for all): ")
    if term:
        c0 = ndf.Term == term
        nndf = ndf[c0].reset_index(drop=True)
    else:
        nndf = ndf
    def percentage_of_negative(l: pd.Series) -> float:
        return (l < 2.0).sum()/l.shape[0]
    def percentage_of_positive(l: pd.Series) -> float:
        return (l > 4.0).sum()/l.shape[0]
    def get_country(l: pd.Series) -> str:
        return l.values[0]
    gb2 = nndf.groupby([f"{target}"]).agg({
        "country": get_country,
        "logits_pondered": [percentage_of_negative, percentage_of_positive, "count"],
    }).reset_index()
    gb2.columns = f"{target} country percentage_of_negative percentage_of_positive count".split()

    # gb2 = gb2.merge(gb1, on=f"{target}", how="left")
    gb2 = gb2.sort_values(by=sort_by, ascending=False)

    if filter:
        sigma = gb2["count"].std()
        mu = gb2["count"].median()
        q1, q3 = gb2["count"].quantile([0.25, 0.75]).values.tolist()
        if filter == "1sd":
            c = (gb2["count"] >= mu - sigma) & (gb2["count"] <= mu+sigma)
        elif filter == "2sd":
            c = (gb2["count"] >= mu - 2*sigma) & (gb2["count"] <= mu + 2*sigma)
        elif filter == "interquartile":
            c = (gb2["count"] >= q1) & (gb2["count"] <= q3)
        elif filter == "Q1+":
            c = (gb2["count"] >= q1) & (gb2["count"] <= q3)
        else:
            raise NotImplementedError("Only accepts '1sd' or '2sd' for now")
        gb2 = gb2[c]
    return gb2.reset_index(drop=True)

In [3]:
# df[df.country.isin(["HR", "FR"])].groupby("country Term Speaker_MP".split()).agg({
#     "Date": [min, max, "count"]
# }).sort_values(by=("Date", "min"))

In [4]:
calculate_sentiment("Speaker_name", country="HR", term="9. mandat").head()

Unnamed: 0,Speaker_name,country,percentage_of_negative,percentage_of_positive,count
0,"Esih, Bruna",HR,1.0,0.0,13
1,"Glasnović, Željko",HR,0.973545,0.0,567
2,"Klarin, Ivan",HR,0.942857,0.0,35
3,"Pernar, Ivan",HR,0.939297,0.003834,1565
4,"Bunjac, Branimir",HR,0.875089,0.002855,1401


In [5]:
calculate_sentiment("Speaker_name", country="FR", term="15e législature").head()

Unnamed: 0,Speaker_name,country,percentage_of_negative,percentage_of_positive,count
0,"Parigi, Jean-François",FR,0.928571,0.0,14
1,"Dupont-Aignan, Nicolas",FR,0.926335,0.01105,543
2,"Houplain, Myriane",FR,0.923077,0.0,13
3,"Cornut-Gentille, François",FR,0.857143,0.0,84
4,"Evrard, José",FR,0.846154,0.0,26


In [6]:
calculate_sentiment("Speaker_party", country="HR", term="9. mandat")


Unnamed: 0,Speaker_party,country,percentage_of_negative,percentage_of_positive,count
0,Živi zid,HR,0.894138,0.003208,3429
1,HRAST,HR,0.791436,0.015193,724
2,PH,HR,0.767142,0.019009,1473
3,HSS,HR,0.677419,0.029271,1674
4,MOST,HR,0.656041,0.01838,10446
5,SDP,HR,0.62389,0.022892,14634
6,HSLS,HR,0.615385,0.076923,13
7,SNAGA,HR,0.601911,0.036624,628
8,HSU,HR,0.5888,0.0256,625
9,IDS,HR,0.477612,0.042644,469


In [7]:
calculate_sentiment("Speaker_party", country="FR", term="15e législature")

Unnamed: 0,Speaker_party,country,percentage_of_negative,percentage_of_positive,count
0,FI,FR,0.781236,0.010553,12982
1,GDR,FR,0.694278,0.017181,6519
2,UDI_I,FR,0.659779,0.017343,2710
3,-,FR,0.64128,0.024,6250
4,NG,FR,0.630577,0.01741,1838
5,LT,FR,0.602067,0.021814,4355
6,LR,FR,0.521784,0.015534,27360
7,LC,FR,0.517361,0.045139,576
8,EDS,FR,0.513178,0.046512,645
9,UDI-A-I,FR,0.492188,0.03683,896


# Overall most negative and most positive parties



Top 50 most negative speakers:

In [8]:
calculate_sentiment("Speaker_name", country="", term="",filter="Q1+").head(50)


Unnamed: 0,Speaker_name,country,percentage_of_negative,percentage_of_positive,count
0,"Montero Soler, Alberto",ES,1.0,0.0,45
1,"Abascal Conde, Santiago",ES,0.990741,0.0,108
2,"Karakōstas, Eyaggelos",GR,0.983051,0.0,59
3,"Božić-Talijan, Elena",RS,0.981481,0.0,54
4,"Mpoyrnoys, Iōannīs",GR,0.980769,0.0,52
5,"Şık, Ahmet",TR,0.98,0.0,50
6,"Radić, Gojko",RS,0.978723,0.0,47
7,"Santamaría Ruiz, Luis",ES,0.977778,0.022222,45
8,"Komnīnaka, Maria",GR,0.976923,0.0,130
9,"Chatzīsavvas, Chrīstos",GR,0.976744,0.0,86


50 most positive speakers:

In [9]:
calculate_sentiment("Speaker_name", country="", term="",filter="Q1+", sort_by="percentage_of_positive").head(50)


Unnamed: 0,Speaker_name,country,percentage_of_negative,percentage_of_positive,count
0,"Popović, Branko",RS,0.0,0.977778,45
1,"Gültekin, Selim",TR,0.022727,0.75,88
2,"Kompirović, Stanija",RS,0.032787,0.688525,61
3,"Milojević, Milosav",RS,0.0,0.6875,48
4,"Özkan, Hacı",TR,0.039062,0.667969,256
5,"Marković, Vesna",RS,0.046512,0.627907,43
6,"Davidovac, Tijana",RS,0.018519,0.611111,54
7,"Savkić, Dragan",RS,0.0,0.592593,54
8,"Baričević, Danica",HR,0.071895,0.581699,153
9,"Lukić, Mladen",RS,0.066667,0.577778,45


50 most negative parties:

In [10]:
calculate_sentiment("Speaker_party_name", country="", term="",filter="Q1+").head(50)


Unnamed: 0,Speaker_party_name,country,percentage_of_negative,percentage_of_positive,count
0,PZDD,RS,0.929078,0.0,141
1,KV,RS,0.897059,0.0,136
2,Vox,ES,0.895965,0.01198,1586
3,Emek Partisi,TR,0.895735,0.009479,211
4,Živi zid,HR,0.894138,0.003208,3429
5,"Grandi Autonomie e Libertà (Grande Sud, Popola...",IT,0.888889,0.0,243
6,A Párbeszéd Magyarországért parlamenti frakció...,HU,0.884211,0.003509,285
7,Λαϊκός Σύνδεσμος - Χρυσή Αυγή,GR,0.876892,0.006375,2510
8,Candidatura d'Unitat Popular,ES,0.871186,0.0,295
9,A Momentum Mozgalom parlamenti frakciója a 9. ...,HU,0.867384,0.003584,279


50 most positive parties:

In [11]:
calculate_sentiment("Speaker_party_name", country="", term="",filter="Q1+", sort_by="percentage_of_positive").head(50)


Unnamed: 0,Speaker_party_name,country,percentage_of_negative,percentage_of_positive,count
0,Ujedinjena seljačka stranka,RS,0.055394,0.451895,343
1,Srpska narodna partija,RS,0.252577,0.242268,194
2,Депутатська група «Платформа за життя та мир»,UA,0.187251,0.183267,502
3,Büyük Birlik Partisi,TR,0.293333,0.18,150
4,Депутатська група «Відновлення України»,UA,0.283019,0.179245,212
5,Non-affiliated;Conservative,GB,0.175,0.170833,240
6,Savez vojvođanskih Mađara,RS,0.144338,0.141946,1254
7,Građanska demokratska stranka,BA,0.180488,0.126829,205
8,Jedinstvena Srbija,RS,0.203433,0.12618,1165
9,Srpski narodni savez,BA,0.07947,0.125828,151
