# Imports

In [78]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import src.utils.preprocessing as pre
import src.utils.plotting as plot
import src.models.sentiment_analysis as sa
import src.models.topics_extraction as te
import src.models.predictions as pred
import src.models.cooperation as coop

# Data and Pre-Processing

In [14]:
data = pre.import_RFA()
print(data.shape)
data.head(5)

(198275, 7)


Unnamed: 0,SRC,TGT,VOT,RES,YEA,DAT,TXT
0,Steel1943,BDD,1,1,2013,"23:13, 19 April 2013",Support as conom.
1,Cuchullain,BDD,1,1,2013,"01:04, 20 April 2013",Support as nominator.
2,INeverCry,BDD,1,1,2013,"23:43, 19 April 2013",Support per noms.
3,Cncmaster,BDD,1,1,2013,"00:11, 20 April 2013",Support per noms. BDD is a strong contributor ...
4,Miniapolis,BDD,1,1,2013,"00:56, 20 April 2013","Support, with great pleasure. I work with BDD ..."


In [79]:
# df_processed, df, unique_elections, unique_candidate_freq_table, single_runners_list, multiple_runners_list, flagged_elec_id = pre.preprossess_eda(data)

In [None]:
# df1 = df.copy()
# votes, df1 = pre.get_votes(df1)

In [2]:
dfsa, qs = pre.complete_prepro_w_sa_topics()

In [5]:
dfsa = dfsa.iloc[:,[0,1,2,3,4,5,14,15,6,7,8,9,10,11,12,13,16,17,18,19,20,21,22]]

In [50]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.io as pio
import pandas as pd

In [87]:
sentiment_stats = merged.groupby(['sentiment', 'RES']).size().unstack(fill_value=0)

# Calculate total and success rate by sentiment
sentiment_stats['Total'] = sentiment_stats.sum(axis=1)
sentiment_stats['Pass Rate'] = sentiment_stats[1] / sentiment_stats['Total']

# Format for plotting
df_g = sentiment_stats[[1, -1, 'Total']].reset_index().rename(columns={1: 'pos', -1: 'neg'})
df_g2 = sentiment_stats[['Pass Rate']].reset_index()

In [88]:
fig = go.Figure(
    data=[
        # Bar for Losses
        go.Bar(
            x=df_g['sentiment'],
            y=df_g['neg'],
            name="Losses",
            text=df_g['neg'],
            textposition='inside',
            marker=dict(color="salmon")
        ),
        # Bar for Wins
        go.Bar(
            x=df_g['sentiment'],
            y=df_g['pos'],
            name="Wins",
            text=df_g['pos'],
            textposition='inside',
            marker=dict(color="teal")
        ),
        # Bar for Totals
        go.Bar(
            x=df_g['sentiment'],
            y=df_g['Total'],
            name="Total",
            text=df_g['Total'],
            textposition='outside',
            marker=dict(color="blue")
        ),
    ],
    layout=dict(bargap=0.2,barcornerradius=15)
)

# Overlay line graph for Pass Rate
fig.add_trace(
    go.Scatter(
        x=df_g2['sentiment'],
        y=df_g2['Pass Rate'] * 100,  # Convert to percentage
        mode="lines+markers+text",
        name="Pass Rate",
        text=[f"{pr * 100:.1f}%" for pr in df_g2["Pass Rate"]],
        textposition="top center",
        line=dict(color="red", width=2),
        marker=dict(color="red", size=8),
        yaxis="y2"
    )
)

# Update layout
fig.update_layout(
    title=dict(
        text="Success Rate vs Voter Sentiment",
        x=0.5,
        xanchor="center"
    ),
    xaxis=dict(
        title="Voter Sentiment"
    ),
    yaxis=dict(
        title="Counts",
        side="left"
    ),
    yaxis2=dict(
        title="Pass Rate (%)",
        overlaying="y",
        side="right",
        range=[0, 100],
        tickformat=".0f"
    ),
    barmode="group",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Save and show
pio.write_html(fig, file="res/Plots/pass_rates_positive_negative.html", auto_open=False)
fig.show()


In [83]:
# Compute sentiment stats by year
sentiment_by_year = pd.DataFrame({
    'POSITIVE': merged[merged['sentiment'] == 'POSITIVE'].groupby('YEA')['RES'].count(),
    'NEGATIVE': merged[merged['sentiment'] == 'NEGATIVE'].groupby('YEA')['RES'].count(),
})

# Add percentages
sentiment_by_year['POSITIVE_PERC'] = sentiment_by_year['POSITIVE'] / sentiment_by_year.sum(axis=1)
sentiment_by_year['NEGATIVE_PERC'] = sentiment_by_year['NEGATIVE'] / sentiment_by_year.sum(axis=1)

# Reset index for plotting
sentiment_by_year = sentiment_by_year.reset_index()

# Data for bar and line plots
df_g = sentiment_by_year.drop(['POSITIVE_PERC', 'NEGATIVE_PERC'], axis=1)
df_g['TOTAL'] = df_g.sum(axis=1)

df_g2 = sentiment_by_year[['YEA', 'POSITIVE_PERC']]

In [84]:
fig = go.Figure(
    data=[
        go.Bar(
            x=df_g.YEA,
            y=df_g.NEGATIVE,
            name="NEGATIVE",
            text=df_g.NEGATIVE,
            textposition="inside",
            marker=dict(color="salmon")
        ),
        go.Bar(
            x=df_g.YEA,
            y=df_g.POSITIVE,
            name="POSITIVE",
            text=df_g.POSITIVE,
            textposition="inside",
            marker=dict(color="teal")
        ),
        go.Bar(
            x=df_g.YEA,
            y=df_g.TOTAL,
            name="Total",
            text=df_g.TOTAL,
            textposition="outside",
            marker=dict(color="blue")
        ),
    ],
    layout=dict(
        barcornerradius=15,
    ),
)

# Overlay line graph for Positive Sentiment Rate
fig.add_trace(
    go.Scatter(
        x=df_g2.YEA,
        y=df_g2.POSITIVE_PERC * 100,  # Convert to percentage
        mode="lines+markers+text",
        name="Positive Sentiment Rate",
        text=[f"{pr*100:.1f}%" for pr in df_g2["POSITIVE_PERC"]],
        textposition="top center",
        line=dict(color="red", width=2),
        marker=dict(color="red", size=8),
        yaxis="y2"
    )
)

# Update layout for dual y-axis
fig.update_layout(
    title=dict(
        text="Evolution of Positive and Negative Sentiments Over Years",
        x=0.5,
        xanchor="center"
    ),
    xaxis=dict(
        title="Year",
        tickvals=df_g.YEA,
    ),
    yaxis=dict(
        title="Counts",
        side="left"
    ),
    yaxis2=dict(
        title="Positive Sentiment Rate (%)",
        overlaying="y",
        side="right",
        range=[0, 100],
        tickformat=".0f%",
    ),
    barmode="group",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Save and show the plot
pio.write_html(fig, file="res/Plots/Evolution_of_Sentiments.html", auto_open=False)
fig.show()


In [70]:
merged['sentiment_vader'] = merged.apply(
    lambda row: 'POSITIVE' if row['vader_pos'] > row['vader_neg'] else 'NEGATIVE', axis=1
)

In [None]:
sentiment_stats_vader = merged.groupby(['sentiment_vader', 'RES']).size().unstack(fill_value=0)

# Calculate total and success rate by sentiment
sentiment_stats_vader['Total'] = sentiment_stats_vader.sum(axis=1)
sentiment_stats_vader['Pass Rate'] = sentiment_stats_vader[1] / sentiment_stats_vader['Total']

# Format for plotting
df_g = sentiment_stats_vader[[1, -1, 'Total']].reset_index().rename(columns={1: 'pos', -1: 'neg'})
df_g2 = sentiment_stats_vader[['Pass Rate']].reset_index()

In [77]:
fig = go.Figure(
    data=[
        # Bar for Losses
        go.Bar(
            x=df_g['sentiment_vader'],
            y=df_g['neg'],
            name="Losses",
            text=df_g['neg'],
            textposition='inside',
            marker=dict(color="salmon")
        ),
        # Bar for Wins
        go.Bar(
            x=df_g['sentiment_vader'],
            y=df_g['pos'],
            name="Wins",
            text=df_g['pos'],
            textposition='inside',
            marker=dict(color="teal")
        ),
        # Bar for Totals
        go.Bar(
            x=df_g['sentiment_vader'],
            y=df_g['Total'],
            name="Total",
            text=df_g['Total'],
            textposition='outside',
            marker=dict(color="blue")
        ),
    ],
    layout=dict(bargap=0.2,barcornerradius=15)
)

# Overlay line graph for Pass Rate
fig.add_trace(
    go.Scatter(
        x=df_g2['sentiment_vader'],
        y=df_g2['Pass Rate'] * 100,  # Convert to percentage
        mode="lines+markers+text",
        name="Pass Rate",
        text=[f"{pr * 100:.1f}%" for pr in df_g2["Pass Rate"]],
        textposition="top center",
        line=dict(color="red", width=2),
        marker=dict(color="red", size=8),
        yaxis="y2"
    )
)

# Update layout
fig.update_layout(
    title=dict(
        text="Success Rate vs Voter Sentiment",
        x=0.5,
        xanchor="center"
    ),
    xaxis=dict(
        title="Voter Sentiment"
    ),
    yaxis=dict(
        title="Counts",
        side="left"
    ),
    yaxis2=dict(
        title="Pass Rate (%)",
        overlaying="y",
        side="right",
        range=[0, 100],
        tickformat=".0f"
    ),
    barmode="group",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Save and show
pio.write_html(fig, file="res/Plots/pass_rates_positive_negative_vader.html", auto_open=False)
fig.show()

In [85]:
# Compute sentiment stats by year
sentiment_by_year = pd.DataFrame({
    'POSITIVE': merged[merged['sentiment_vader'] == 'POSITIVE'].groupby('YEA')['RES'].count(),
    'NEGATIVE': merged[merged['sentiment_vader'] == 'NEGATIVE'].groupby('YEA')['RES'].count(),
}).fillna(0)

# Add percentages
sentiment_by_year['POSITIVE_PERC'] = sentiment_by_year['POSITIVE'] / sentiment_by_year.sum(axis=1)
sentiment_by_year['NEGATIVE_PERC'] = sentiment_by_year['NEGATIVE'] / sentiment_by_year.sum(axis=1)

# Reset index for plotting
sentiment_by_year = sentiment_by_year.reset_index()

# Data for bar and line plots
df_g = sentiment_by_year.drop(['POSITIVE_PERC', 'NEGATIVE_PERC'], axis=1)
df_g['TOTAL'] = df_g.sum(axis=1)

df_g2 = sentiment_by_year[['YEA', 'POSITIVE_PERC']]

In [86]:
fig = go.Figure(
    data=[
        go.Bar(
            x=df_g.YEA,
            y=df_g.NEGATIVE,
            name="NEGATIVE",
            text=df_g.NEGATIVE,
            textposition="inside",
            marker=dict(color="salmon")
        ),
        go.Bar(
            x=df_g.YEA,
            y=df_g.POSITIVE,
            name="POSITIVE",
            text=df_g.POSITIVE,
            textposition="inside",
            marker=dict(color="teal")
        ),
        go.Bar(
            x=df_g.YEA,
            y=df_g.TOTAL,
            name="Total",
            text=df_g.TOTAL,
            textposition="outside",
            marker=dict(color="blue")
        ),
    ],
    layout=dict(
        barcornerradius=15,
    ),
)

# Overlay line graph for Positive Sentiment Rate
fig.add_trace(
    go.Scatter(
        x=df_g2.YEA,
        y=df_g2.POSITIVE_PERC * 100,  # Convert to percentage
        mode="lines+markers+text",
        name="Positive Sentiment Rate",
        text=[f"{pr*100:.1f}%" for pr in df_g2["POSITIVE_PERC"]],
        textposition="top center",
        line=dict(color="red", width=2),
        marker=dict(color="red", size=8),
        yaxis="y2"
    )
)

# Update layout for dual y-axis
fig.update_layout(
    title=dict(
        text="Evolution of Positive and Negative Sentiments Over Years",
        x=0.5,
        xanchor="center"
    ),
    xaxis=dict(
        title="Year",
        tickvals=df_g.YEA,
    ),
    yaxis=dict(
        title="Counts",
        side="left"
    ),
    yaxis2=dict(
        title="Positive Sentiment Rate (%)",
        overlaying="y",
        side="right",
        range=[0, 100],
        tickformat=".0f%",
    ),
    barmode="group",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Save and show the plot
pio.write_html(fig, file="res/Plots/Evolution_of_Sentiments_vader.html", auto_open=False)
fig.show()