In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors

### Configuration

In [None]:
PATH_CLIMATE = "climate_stance_done.csv"
PATH_FEMINISM = ""
PATH_ASYL = ""

TOPIC_CLIMATE = "Climate"
TOPIC_FEMINISM = "Feminism"
TOPIC_ASYL = "Asyl"

PDF_FOLDER_CLIMATE = "plots/climate/"
PDF_FOLDER_FEMINISM = "plots/feminism/"
PDF_FOLDER_ASYL = "plots/asyl/"

PATH_TO_SPEAKER_UNIQUE_FILE = "speaker_party_unique.csv"

ALL_CLIMATE = (PATH_CLIMATE, TOPIC_CLIMATE, PDF_FOLDER_CLIMATE)
ALL_FEMINISM = (PATH_FEMINISM, TOPIC_FEMINISM, PDF_FOLDER_FEMINISM)
ALL_ASYL = (PATH_ASYL, TOPIC_ASYL, PDF_FOLDER_ASYL)

The topic can be specified by changing the topic of the ALL_TOPIC variable to the desired topic

In [None]:
PATH_CHOSEN, TOPIC_CHOSEN, PDF_FOLDER_CHOSEN = ALL_CLIMATE

A set of 8 colorblind-friendly colors from Bang Wong’s Nature Methods paper https://www.nature.com/articles/nmeth.1618.pdf

Taken from https://scottplot.net/cookbook/4.1/colors/#colorblind-friendly

In [None]:
HEX_COLORS = ['#000000','#E69F00','#56B4E9','#009E73','#F0E442','#0072B2','#D55E00','#CC79A7']
RGB_COLORS = [mcolors.hex2color(hex_color) for hex_color in HEX_COLORS]

BAR_CHART_COLOR = RGB_COLORS[2]

Create the dataframe containing speakers and party

In [None]:
df_speaker_party = pd.read_csv(PATH_TO_SPEAKER_UNIQUE_FILE)

In [None]:
dict_speaker_party = df_speaker_party.set_index(["speaker"])["unique_speaker"].to_dict()

Many speakers are available in several forms, this function is used to map them to one unique name

In [None]:
def get_unique_speaker(speaker, speaker_dict):
    return speaker_dict[speaker]

Create the dataframe for the chosen topic

In [None]:
df_stance = pd.read_csv(PATH_CHOSEN)

In [None]:
df_stance["speaker_unique"] = df_stance["speaker"].apply(lambda speaker: get_unique_speaker(speaker, dict_speaker_party))

In [None]:
df_stance_filtered = df_stance[~df_stance["label"].isna()]

Create a plot containing the amount of statements relevant to the topic per party

In [None]:
plot = df_stance_filtered["party"].value_counts().plot(
                kind="bar",
                color=BAR_CHART_COLOR,
                title="Amount of speeches per party (Topic = {topic})".format(topic=TOPIC_CHOSEN)
            )

fig = plot.get_figure()
fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'plot_speeches_per_party_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

Plot the total amount of each stance

In [None]:
plot = df_stance_filtered["label"].value_counts().plot(
    kind="bar",
    color=BAR_CHART_COLOR,
    title="Total amount of each stance (Topic = {topic})".format(topic=TOPIC_CHOSEN)
)

fig = plot.get_figure()
fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'plot_total_amount_stance_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

In [None]:
grouped_df = df_stance_filtered.groupby(['label', 'party']).size().unstack()

color_map = {
    'FPÖ' : HEX_COLORS[5], 
    'Grüne' : HEX_COLORS[3], 
    'NEOS' : HEX_COLORS[7], 
    'Parteilos' : HEX_COLORS[1], 
    'SPÖ' : HEX_COLORS[6], 
    'ÖVP' : HEX_COLORS[2]
}

colors = grouped_df.columns.map(color_map)

# Create the bar chart
plot = grouped_df.plot(kind='bar', stacked=False,color=colors)

# Add labels and title
plt.xlabel('Political Stance')
plt.ylabel('Count')
plt.title('Grouped Bar Chart of political stances per party ({topic})'.format(topic=TOPIC_CHOSEN))

# Show the chart
plt.show()

fig = plot.get_figure()
fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'grouped_stance_party_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

In [None]:
topic_speakers_unique = df_stance_filtered.groupby("party")["speaker_unique"].nunique().sort_index()
total_speakers_unique = df_speaker_party.groupby("party")["unique_speaker"].nunique().sort_index()

# create a figure and axes object
fig, ax = plt.subplots()

# specify the width of each bar
bar_width = 0.4

# specify the x-coordinates of the bars in each plot
x_pos1 = np.arange(len(topic_speakers_unique.index))
x_pos2 = x_pos1 + bar_width

ax.bar(x_pos1, topic_speakers_unique.values, width=bar_width, label='Speakers Topic',color=RGB_COLORS[1])

ax.bar(x_pos2, total_speakers_unique.values, width=bar_width, label='Speakers Total',color=BAR_CHART_COLOR)

ax.set_xticks(x_pos1 + bar_width / 2)
ax.set_xticklabels(topic_speakers_unique.index)

ax.set_xlabel('Party')
ax.set_ylabel('Speaker Count')

ax.set_title('Total Speakers vs. Speakers who spoke about topic ({topic})'.format(topic=TOPIC_CHOSEN))

ax.legend()

plt.show()

fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'total_vs_speakers_on_topic_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

Average stance per party

In [None]:
df_stance_filtered["label"].unique()

In [None]:
def map_stance_to_number(stance):
    if stance == 'Strongly Approving Stance':
        return 1.0
    
    if stance == 'Moderately Approving Stance':
        return 0.75
    
    if stance == 'Neutral Stance':
        return 0.5
    
    if stance == 'Moderately Opposing Stance':
        return 0.25

    if stance == 'Strongly opposing stance':
        return 0

In [None]:
df_stance_filtered["stance_numeric"] = df_stance_filtered["label"].apply(lambda stance: map_stance_to_number(stance))

In [None]:
plot = df_stance_filtered.groupby("party")["stance_numeric"].mean().sort_values().plot(
    kind="bar",
    color=BAR_CHART_COLOR,
    title="Average stance per party ({topic})".format(topic=TOPIC_CHOSEN)
)

plot.axhline(y=0.75, color=RGB_COLORS[1], linestyle='--')
plot.text(-2, 0.75, 'Mod. Approving', color=RGB_COLORS[1])
plot.axhline(y=0.5, color=RGB_COLORS[1], linestyle='--')
plot.text(-2, 0.5, 'Neutral', color=RGB_COLORS[1])


fig = plot.get_figure()
fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'average_stance_party_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

In [None]:
dict_average_stance_per_party = dict(df_stance_filtered.groupby("party")["stance_numeric"].mean())

In [None]:
df_mean_stance_per_speaker = pd.DataFrame(df_stance_filtered.groupby(["speaker_unique","party"])["stance_numeric"].mean()).reset_index()

In [None]:
df_mean_stance_per_speaker["average_party_stance"] = df_mean_stance_per_speaker["party"].apply(lambda party: dict_average_stance_per_party[party])

In [None]:
df_mean_stance_per_speaker["stance_higher_average"] = df_mean_stance_per_speaker["stance_numeric"] > df_mean_stance_per_speaker["average_party_stance"]

In [None]:
df_mean_stance_per_speaker

Find the one's within 5% range

In [None]:
def stance_is_close_to_average_party_stance(stance_numeric, average_party_stance):
    stance_max = stance_numeric * 1.05
    stance_min = stance_numeric * 0.95
    
    if stance_max < average_party_stance:
        return False
    if stance_min > average_party_stance:
        return False
    else:
        return True

In [None]:
df_mean_stance_per_speaker["stance_5_percent_range"] = df_mean_stance_per_speaker.apply(lambda x: stance_is_close_to_average_party_stance(x.stance_numeric,x.average_party_stance),axis=1)

In [None]:
df_mean_stance_per_speaker[df_mean_stance_per_speaker["stance_5_percent_range"]]["party"].value_counts().sort_index()

In [None]:
df_mean_stance_per_speaker["party"].value_counts().sort_index()

In [None]:
amount_of_speakers_in_range = df_mean_stance_per_speaker[df_mean_stance_per_speaker["stance_5_percent_range"]]["party"].value_counts().sort_index()
total_speakers_per_party = df_mean_stance_per_speaker["party"].value_counts().sort_index()

# create a figure and axes object
fig, ax = plt.subplots()

# specify the width of each bar
bar_width = 0.4

# specify the x-coordinates of the bars in each plot
x_pos1 = np.arange(len(amount_of_speakers_in_range.index))
x_pos2 = x_pos1 + bar_width

ax.bar(x_pos1, amount_of_speakers_in_range.values, width=bar_width, label='Speakers in range',color=RGB_COLORS[1])

ax.bar(x_pos2, total_speakers_per_party.values, width=bar_width, label='Speakers Total', color=BAR_CHART_COLOR)

ax.set_xticks(x_pos1 + bar_width / 2)
ax.set_xticklabels(amount_of_speakers_in_range.index)

ax.set_xlabel('Party')
ax.set_ylabel('Speaker Count')

ax.set_title('Total Speakers vs. Speakers in 5% range of average stance ({topic})'.format(topic=TOPIC_CHOSEN))

ax.legend()

plt.show()

fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'total_vs_speakers_in_range_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

Top 10 seatings with most statements about topic

In [None]:
df_stance_filtered["seating"] = df_stance_filtered["file"].apply(lambda filename: filename.split("_PARSED")[0])

In [None]:
plot = df_stance_filtered["seating"].value_counts().sort_values(ascending=False).head(10).plot(
    kind="bar",
    color=BAR_CHART_COLOR,
    title = "National council meetings with the most speeches about topic ({topic})".format(topic=TOPIC_CHOSEN)
)

fig = plot.get_figure()
fig.tight_layout()
fig.savefig(PDF_FOLDER_CHOSEN + 'meetings_top_10_{topic}.pdf'.format(topic=TOPIC_CHOSEN), format='pdf')

In [None]:
df_stance_filtered["seating"].value_counts().sort_values(ascending=False).head(3)