In [1]:
import polars as pl
import plotly.express as px
import plotly.graph_objects as go

In [2]:
df_parla = pl.read_parquet("ParlaMind_all.parquet") # Needs dataset processed by sentiment.ipynb

KeyboardInterrupt: 

In [None]:
df_parla = df_parla.filter(df_parla["speechContent"].str.len_chars() >= 40)

In [None]:
#df_parla

In [None]:
def plot_sentiment_per_speech(df_sentiment):
    df = df_sentiment.with_columns(
    pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )
    
    df_sentiment_year = df.group_by(["year", "sent_pred"]).agg(
        pl.len().alias("sentiment_count")
    )

    df_sentiment_year = df_sentiment_year.sort("year")

    sentiment_colors = {"positive": "green", "negative": "red", "neutral": "blue"}


    fig = px.line(df_sentiment_year, x="year", y="sentiment_count", color='sent_pred', color_discrete_map=sentiment_colors)
    fig.show()


In [None]:
def plot_sentiment_per_party_perc(df_sentiment):

    party_list = ['AfD', 'Grüne', 'CDU/CSU', 'FDP', 'SPD', 'DIE LINKE.', "BSW"]

    df_sentiment = df_sentiment.filter(pl.col("abbreviation").is_in(party_list))
    
    df = df_sentiment.with_columns(
    pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )
    
    df_sentiment_year = df.group_by(["abbreviation", "sent_pred"]).agg(
        pl.len().alias("sentiment_count")
    )

    df_with_pct = (
    df_sentiment_year
    .with_columns([
        pl.col('sentiment_count').sum().over('abbreviation').alias('total_party_count')
    ])
    .with_columns([
        (pl.col('sentiment_count') / pl.col('total_party_count') * 100).alias('percentage')
    ])
    )

    sentiment_colors = {"positive": "green", "negative": "red", "neutral": "blue"}

    fig = px.bar(
    data_frame=df_with_pct,
    x="abbreviation",
    y="percentage",
    color="sent_pred",
    barmode="group",
    color_discrete_map=sentiment_colors
    )

    fig.update_layout(
        title="Sentiment by party in percentage",
        yaxis_title='Percentage',
        xaxis_title='Political Party',
        showlegend=True,
        legend_title='Sentiment'
    )
    
    fig.show()


In [None]:
def plot_sentiment_per_sentence(df_sentiment):

    pos_list = []
    neu_list = []
    neg_list = []
    for sentence_sentiment in df_sentiment.select("sentences_sentiment").to_series():
        pos = 0
        neu = 0
        neg = 0
        for sentiment in sentence_sentiment:
            if sentiment == "positive":
                pos += 1
            elif sentiment == "neutral":
                neu += 1
            elif sentiment == "negative":
                neg += 1
    
        pos_list.append(pos)
        neu_list.append(neu)
        neg_list.append(neg)

    df_parla_top = df_sentiment.with_columns(pl.Series(name="pos_per_sentence", values=pos_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neg_per_sentence", values=neg_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neu_per_sentence", values=neu_list)) 

    df = df_parla_top.with_columns(
    pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )
    
    df_parla_top_year = df.group_by(["year", "pos_per_sentence", "neg_per_sentence", "neu_per_sentence"]).agg(
        pl.len().alias("sentiment_count")
    )

    df_parla_top_year = df_parla_top_year.sort("year")

    
    df_yearly_sum = df_parla_top_year.group_by("year").agg([
    pl.col("pos_per_sentence").sum(),
    pl.col("neg_per_sentence").sum(),
    pl.col("neu_per_sentence").sum()
    ]).sort("year")
    
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(x=df_yearly_sum['year'], 
                            y=df_yearly_sum['pos_per_sentence'],
                            name='Positive'))
    
    fig.add_trace(go.Scatter(x=df_yearly_sum['year'], 
                            y=df_yearly_sum['neg_per_sentence'],
                            name='Negative'))
    
    fig.add_trace(go.Scatter(x=df_yearly_sum['year'], 
                            y=df_yearly_sum['neu_per_sentence'],
                            name='Neutral'))
    
    fig.update_layout(
        title='Sentiment Analyse pro Jahr',
        xaxis_title='Jahr',
        yaxis_title='Summe der Sentiments',
        legend_title='Sentiment Typ'
    )
    
    fig.show()


In [None]:
def plot_sentiment_per_party_sentence_perc(df_sentiment):

    party_list = ['AfD', 'Grüne', 'CDU/CSU', 'FDP', 'SPD', 'DIE LINKE.', "BSW"]

    df_sentiment = df_sentiment.filter(pl.col("abbreviation").is_in(party_list))
    
    pos_list = []
    neu_list = []
    neg_list = []
    for sentence_sentiment in df_sentiment.select("sentences_sentiment").to_series():
        pos = 0
        neu = 0
        neg = 0
        for sentiment in sentence_sentiment:
            if sentiment == "positive":
                pos += 1
            elif sentiment == "neutral":
                neu += 1
            elif sentiment == "negative":
                neg += 1
    
        pos_list.append(pos)
        neu_list.append(neu)
        neg_list.append(neg)
    
    df_parla_top = df_sentiment.with_columns(pl.Series(name="pos_per_sentence", values=pos_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neg_per_sentence", values=neg_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neu_per_sentence", values=neu_list)) 
    
    df = df_parla_top.with_columns(
    pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )
    
    df_parla_top_year = df.group_by(["abbreviation", "pos_per_sentence", "neg_per_sentence", "neu_per_sentence"]).agg(
        pl.len().alias("sentiment_count")
    )
    
    
    df_yearly_sum = df_parla_top_year.group_by("abbreviation").agg([
    pl.col("pos_per_sentence").sum(),
    pl.col("neg_per_sentence").sum(),
    pl.col("neu_per_sentence").sum()
    ])

    df_party_perc = df_yearly_sum.with_columns((pl.col("pos_per_sentence") + pl.col("neg_per_sentence") + pl.col("neu_per_sentence")).alias("total_amount"))
    df_party_perc = df_party_perc.with_columns((pl.col("pos_per_sentence") / pl.col("total_amount") * 100).alias("pos_perc"))
    df_party_perc = df_party_perc.with_columns((pl.col("neg_per_sentence") / pl.col("total_amount") * 100).alias("neg_perc"))
    df_party_perc = df_party_perc.with_columns((pl.col("neu_per_sentence") / pl.col("total_amount") * 100).alias("neu_perc"))

    plot_df = df_party_perc.unpivot(
    index=["abbreviation"],
    on=["pos_perc", "neg_perc", "neu_perc"],
    variable_name="sentiment",
    value_name="percentage"
    )
    
    fig = px.bar(plot_df.to_pandas(), 
                 x='abbreviation',
                 y='percentage',
                 color='sentiment',
                 title='Sentiment Distribution by Political Party',
                 labels={'percentage': 'Percentage', 'abbreviation': 'Party'},
                 color_discrete_map={'pos_perc': 'green', 
                                   'neg_perc': 'red',
                                   'neu_perc': 'blue'},
                 barmode='group')  
    
    fig.update_layout(
        yaxis_title='Percentage',
        xaxis_title='Political Party',
        showlegend=True,
        legend_title='Sentiment'
    )
    
    fig.show()

    

In [None]:
def plot_sentiment_per_sentence_percentage_(df_sentiment):
    import polars as pl
    import plotly.graph_objects as go

    pos_list = []
    neu_list = []
    neg_list = []
    
    for sentence_sentiment in df_sentiment.select("sentences_sentiment").to_series():
        pos = 0
        neu = 0
        neg = 0
        for sentiment in sentence_sentiment:
            if sentiment == "positive":
                pos += 1
            elif sentiment == "neutral":
                neu += 1
            elif sentiment == "negative":
                neg += 1
    
        pos_list.append(pos)
        neu_list.append(neu)
        neg_list.append(neg)

    df_parla_top = df_sentiment.with_columns(pl.Series(name="pos_per_sentence", values=pos_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neg_per_sentence", values=neg_list)) 
    df_parla_top = df_parla_top.with_columns(pl.Series(name="neu_per_sentence", values=neu_list)) 

    df = df_parla_top.with_columns(
        pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )
    
    df_yearly_sum = df.group_by("year").agg([
        pl.col("pos_per_sentence").sum().alias("total_positive"),
        pl.col("neg_per_sentence").sum().alias("total_negative"),
        pl.col("neu_per_sentence").sum().alias("total_neutral")
    ]).sort("year")

    df_yearly_sum = df_yearly_sum.with_columns(
        (pl.col("total_positive") + pl.col("total_negative") + pl.col("total_neutral")).alias("total_sentiments")
    ).with_columns([
        (pl.col("total_positive") / pl.col("total_sentiments") * 100).alias("positive_percentage"),
        (pl.col("total_negative") / pl.col("total_sentiments") * 100).alias("negative_percentage"),
        (pl.col("total_neutral") / pl.col("total_sentiments") * 100).alias("neutral_percentage")
    ])

    sentiment_colors = {"Positive (%)": "green", "Negative (%)": "red", "Neutral (%)": "blue"}

    fig = go.Figure()
    
    fig.add_trace(go.Scatter(x=df_yearly_sum["year"].to_list(), 
                             y=df_yearly_sum["positive_percentage"].to_list(),
                             name='Positive (%)',
                            line=dict(color=sentiment_colors["Positive (%)"])))
    
    fig.add_trace(go.Scatter(x=df_yearly_sum["year"].to_list(), 
                             y=df_yearly_sum["negative_percentage"].to_list(),
                             name='Negative (%)', line=dict(color=sentiment_colors["Negative (%)"])))
    
    fig.add_trace(go.Scatter(x=df_yearly_sum["year"].to_list(), 
                             y=df_yearly_sum["neutral_percentage"].to_list(),
                             name='Neutral (%)',line=dict(color=sentiment_colors["Neutral (%)"])))
    
    fig.update_layout(
        title='Sentiment Analysis per Year (Percentage)',
        xaxis_title='Year',
        yaxis_title='Percentage (%)',
        legend_title='Sentiment Type'
    )
    
    fig.show()


In [None]:
def plot_sentiment_per_speech_percentage(df_sentiment):
    import polars as pl
    import plotly.express as px

    df = df_sentiment.with_columns(
        pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )

    df_sentiment_year = df.group_by(["year", "sent_pred"]).agg(
        pl.len().alias("sentiment_count")
    )

    total_per_year = df_sentiment_year.group_by("year").agg(
        pl.col("sentiment_count").sum().alias("total_count")
    )

    df_sentiment_year = df_sentiment_year.join(total_per_year, on="year")

    df_sentiment_year = df_sentiment_year.with_columns(
        (pl.col("sentiment_count") / pl.col("total_count") * 100).alias("sentiment_percentage")
    )

    df_sentiment_year = df_sentiment_year.sort("year")

    fig = px.line(df_sentiment_year.to_pandas(), x="year", y="sentiment_percentage", color='sent_pred', 
                  labels={"sentiment_percentage": "Sentiment Percentage (%)"})

    fig.update_layout(
        title='Sentiment Analysis per Year (Percentage)',
        xaxis_title='Year',
        yaxis_title='Percentage (%)',
        legend_title='Sentiment Type'
    )
    
    fig.show()


In [None]:
def plot_sentiment_per_speech_percentage_person(df_sentiment, lastName, firstName):
    import polars as pl
    import plotly.express as px

    df = df_sentiment.with_columns(
        pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date_parsed")
    ).with_columns(
        pl.col("date_parsed").dt.year().alias("year")
    )

    df = df.filter((pl.col("firstName") == firstName) & (pl.col("lastName") == lastName))

    df_sentiment_year = df.group_by(["year", "sent_pred"]).agg(
        pl.len().alias("sentiment_count")
    )

    total_per_year = df_sentiment_year.group_by("year").agg(
        pl.col("sentiment_count").sum().alias("total_count")
    )

    df_sentiment_year = df_sentiment_year.join(total_per_year, on="year")

    df_sentiment_year = df_sentiment_year.with_columns(
        (pl.col("sentiment_count") / pl.col("total_count") * 100).alias("sentiment_percentage")
    )

    df_sentiment_year = df_sentiment_year.sort("year")

    fig = px.line(df_sentiment_year.to_pandas(), x="year", y="sentiment_percentage", color='sent_pred', 
                  labels={"sentiment_percentage": "Sentiment Percentage (%)"})

    fig.update_layout(
        title=f'Sentiment Analysis per Year (Percentage) {firstName} {lastName}',
        xaxis_title='Year',
        yaxis_title='Percentage (%)',
        legend_title='Sentiment Type'
    )
    
    fig.show()


In [None]:
plot_sentiment_per_speech(df_parla)

In [None]:
plot_sentiment_per_speech_percentage(df_parla)

In [None]:
plot_sentiment_per_party_perc(df_parla)

In [None]:
plot_sentiment_per_sentence(df_parla)

In [None]:
plot_sentiment_per_party_sentence_perc(df_parla)

In [None]:
plot_sentiment_per_sentence_percentage_(df_parla)

In [None]:
candidates = [("Christian", "Lindner"), ("Olaf", "Scholz"), ("Alice", "Weidel"), ("Sahra", "Wagenknecht"), ("Robert", "Habeck"), ("Heidi", "Reichinnek"), ("Friedrich", "Merz")]
for pol in candidates:
    plot_sentiment_per_speech_percentage_person(df_parla, pol[1], pol[0])
