# Champions of sentiment discourse

AUTHOR: Michal Mochtak (michal.mochtak@ru.nl), Peter Rupnik (peter.rupnik@ijs.si), Nikola Ljubešić

DATE: 2024-06-24

---

In this notebook we look into specific countries and their sentiment scores on speaker- and party-level.

On the first run, the data will be downloaded from the internet. In the next cell a function was prepared to filter the dataset by specific conditions (e.g. taking only the MPs that have a specific number of speeches on the record). In the next cells we will inspect two countries in a comparable time frame, Croatia and the Netherlands, and then the entire corpus across full time span.

In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
from pathlib import Path
import seaborn as sns
from IPython.display import display
if not Path("speeches.csv.zip").exists():
    from os import system
    system("wget https://huggingface.co/datasets/5roop/parlasent_data/resolve/main/speeches.csv.zip")
df = pd.read_csv("speeches.csv.zip")

  df = pd.read_csv("speeches.csv.zip")


In [2]:
def calculate_sentiment(target="Speaker_name", *, country=None, term=None):
    global df
    all_countries = df.country.unique().tolist()
    if country == None:
        country = input(f"Choose country from {all_countries} \n(empty for all): ")

    # Filtering
    # Select speeches from a specific country:
    c0 = df.country == country
    if country in ["all", ""]:
        c0 = pd.Series([True for i in df.country])
    # Keep only MPs
    c1 = (df.Speaker_MP == "MP")
    # Limit searches to speeches longer than 100 characters:
    c2 = df.char_length >= 100
    # Include only speakers with at least 10 speeches:
    gb = df[c0&c1&c2].groupby("Speaker_name").logits_pondered.count().reset_index()
    speakers_to_keep = gb.Speaker_name[gb.logits_pondered >= 10]
    c3 = df.Speaker_name.isin(speakers_to_keep)
    ndf = df[c0&c1&c2&c3]
    if term == None:
        print(f"Available terms:")
        display(ndf.groupby("Term").agg({
            "Date": [min, max, "count"],
        }).sort_values(("Date", "min")), clear=True, )
        term = input(f"Choose term from {ndf.Term.unique().tolist()} (empty for all): ")
    if term:
        c0 = ndf.Term == term
        nndf = ndf[c0].reset_index(drop=True)
    else:
        nndf = ndf
    def percentage_of_negative(l: pd.Series) -> float:
        return (l < 2.0).sum()/l.shape[0]
    gb2 = nndf.groupby([f"{target}"]).agg({
        "logits_pondered": [percentage_of_negative, "count"],
    }).reset_index()
    gb2.columns = f"{target} percentage_of_negative count".split()

    # gb2 = gb2.merge(gb1, on=f"{target}", how="left")
    gb2 = gb2.sort_values(by="percentage_of_negative", ascending=False)
    return gb2

Let's inspect the terms we have available, so that an approximately equal timeframe can be set:

In [3]:
df[df.country.isin(["HR", "NL"])].groupby("country Term Speaker_MP".split()).agg({
    "Date": [min, max, "count"]
}).sort_values(by=("Date", "min"))

  df[df.country.isin(["HR", "NL"])].groupby("country Term Speaker_MP".split()).agg({
  df[df.country.isin(["HR", "NL"])].groupby("country Term Speaker_MP".split()).agg({


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date,Date,Date
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,min,max,count
country,Term,Speaker_MP,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
HR,5. mandat,MP,2003-12-22,2007-10-12,74856
HR,5. mandat,notMP,2003-12-22,2007-10-12,4404
HR,5. mandat,-,2004-04-01,2007-10-11,2632
HR,6. mandat,MP,2008-01-11,2011-10-28,68561
HR,6. mandat,notMP,2008-01-11,2011-10-28,4200
HR,6. mandat,-,2008-02-21,2011-10-27,455
HR,7. mandat,notMP,2011-12-22,2015-09-24,3824
HR,7. mandat,MP,2011-12-22,2015-09-25,96544
HR,7. mandat,-,2012-01-27,2015-07-03,1650
NL,Meeting of the 28th Tweede Kamer,notMP,2014-04-16,2017-10-25,76021


In [4]:
calculate_sentiment("Speaker_name", country="HR", term="10. mandat")

Unnamed: 0,Speaker_name,percentage_of_negative,count
59,"Hasanbegović, Zlatko",0.961538,26
154,"Vidović Krišto, Karolina",0.95,300
21,"Bernardić, Davor",0.949153,59
119,"Penava, Ivan",0.928571,28
104,"Mlinarić, Stipo",0.91411,163
144,"Spajić, Daniel",0.902778,72
135,"Raspudić, Nino",0.896266,482
129,"Prkačin, Ante",0.895349,86
123,"Petrov, Božo",0.892216,167
54,"Grmoja, Nikola",0.878113,763


In [5]:
calculate_sentiment("Speaker_name", country="NL", term="Meeting of the 36th Eerste Kamer").shape

(24, 3)

In [6]:
calculate_sentiment("Speaker_party", country="HR", term="10. mandat").shape


(20, 3)

In [7]:
calculate_sentiment("Speaker_party", country="NL", term="Meeting of the 36th Eerste Kamer")

Unnamed: 0,Speaker_party,percentage_of_negative,count
4,FvD,0.740741,297
9,vanPareren,0.714286,7
6,PvdD,0.607143,84
0,-,0.564885,262
5,PvdA,0.546539,419
1,CDA,0.52514,179
2,CU,0.363208,212
3,D66,0.337349,166
7,SP,0.172414,29
8,VVD,0.137565,3482


In [8]:
df["Date"] = pd.to_datetime(df.Date)
df[df.country=="NL"].set_index("Date").groupby([
    # pd.Grouper(freq="1YS"),
    "Term",
    "Party_status",
    "Speaker_MP"
]).logits_pondered.count()

Term                              Party_status  Speaker_MP
Meeting of the 28th Tweede Kamer  -             MP             15091
                                                notMP           9856
                                  Coalition     MP             54753
                                                notMP          33608
                                  Opposition    MP             30175
                                                notMP          32557
Meeting of the 29th Tweede Kamer  -             MP            110694
                                                notMP          31010
                                  Coalition     MP             28734
                                                notMP          74127
                                  Opposition    MP             22160
                                                notMP          54426
Meeting of the 30th Tweede Kamer  -             MP                48
                                            

# Overall most negative and most positive parties



In [9]:
calculate_sentiment("Speaker_party_name", country="", term="").head(20)

Unnamed: 0,Speaker_party_name,percentage_of_negative,count
404,Párbeszéd Magyarországért,1.0,24
462,Srpska napredna stranka;Narodna seljačka stranka,1.0,4
87,Crossbench;Labour,1.0,1
57,Bündnis Zukunft Österreich,1.0,5
591,Парламентарна група: Демократична България;Пар...,1.0,1
121,Edustaja Väyrynen,1.0,3
231,Jobbik Magyarországért Mozgalom,0.969697,33
295,Magyar Szolidaritás Mozgalom,0.962963,27
409,Respect,0.9375,16
352,PZDD,0.929078,141


In [10]:
calculate_sentiment("Speaker_party_name", country="", term="").tail(20)

Unnamed: 0,Speaker_party_name,percentage_of_negative,count
562,Блок Петра Порошенка,0.058781,22218
607,"Політична партія ""Партія регіонів""",0.056925,4989
504,Ujedinjena seljačka stranka,0.055394,343
144,Forza Italia Berlusconi Presidente-UDC,0.05,20
296,Magyarországi Németek Országos Önkormányzata,0.048544,103
564,"Всеукраїнське об'єднання ""Батьківщина""",0.047456,5858
118,Državljanska lista Gregorja Viranta,0.02707,1256
609,"Політична партія ""Слуга народу""",0.021551,18468
604,"Політична партія ""Народний фронт""",0.018041,26384
614,Політична партія «УДАР (Український Демократич...,0.0,2
