In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.dates as mdates


In [None]:
dmeeting = pd.read_parquet('./data/data_meeting.parquet')
dagenda = pd.read_parquet('./data/data_agenda.parquet')
data_speech1 = pd.read_parquet('./data/data_speech1.parquet')
data_speech2 = pd.read_parquet('./data/data_speech2.parquet')
data_speech3 = pd.read_parquet('./data/data_speech3.parquet')
parMem = pd.read_parquet('./data/parliament_members.parquet')

dspeech = pd.concat([data_speech1, data_speech2, data_speech3], axis=0)

In [None]:

# group by meeting_id and agenda_item_id to count the number of speeches per agenda item per meeting and how many of those are labeled as NC vs C
plot_df = dspeech[["meeting_id", "agenda_item_id", "speech_item_id", "label"]]

# Custom aggregation function to calculate NC to C ratio
def c_nc_ratio(group):
    nc_count = sum(group == 'NC')
    c_count = sum(group == 'C')
    return (c_count / (nc_count+c_count))*100 if nc_count != 0 else 0  # Avoid division by zero

# Group by meeting_id and agenda_item_id, and apply the aggregations
plot_df = plot_df.groupby(["meeting_id", "agenda_item_id"]).agg(
    num_speech_items=("speech_item_id", "count"),
    C_NC_ratio=("label", c_nc_ratio)
).reset_index()

In [None]:
plot_df = pd.merge(plot_df, dagenda[["meeting_id","agenda_item_id"]], on=["meeting_id","agenda_item_id"])
plot_df = pd.merge(plot_df, dmeeting[["meeting_id","date"]], on="meeting_id")
# plot_df = plot_df.groupby("meeting_id").agg(num_speech_items=("num_speech_items", "sum"), C_NC_ratio=("C_NC_ratio", "sum"), date=("date","first")).reset_index()
# plot_df["num_speech_items"] = plot_df["num_speech_items"].apply(lambda x: (x/np.sum(plot_df["num_speech_items"])*100))
# Convert 'date' to datetime and set as index
plot_df['date'] = pd.to_datetime(plot_df['date'])
plot_df.set_index('date', inplace=True)

In [None]:

weekly_df = plot_df['C_NC_ratio'].resample('W').mean()
weekly_df.interpolate(inplace=True)

In [None]:
# Plot the rolling averages with custom labels
ax = weekly_df.rolling(window=12, center=True).mean().plot(color='lightblue', linewidth=1, fontsize=20, label='12-Week Rolling Average')
weekly_df.rolling(window=52, center=True).mean().plot(ax=ax, figsize=(20, 10), linewidth=5, fontsize=20, color='green', label='52-Week Rolling Average')

# Election dates
fv_elections = ['2022-11-01', '2019-06-05', '2015-06-18', '2011-09-15']

# Plotting vertical lines for election dates
for i, fv_election in enumerate(fv_elections):
    date = pd.to_datetime(fv_election)
    plt.axvline(date, color='red', linestyle='dashdot', linewidth=1, alpha=0.9, label='General Election' if i == 0 else "")
    
    # Annotating the line
    plt.text(date, plt.gca().get_ylim()[1], fv_election, verticalalignment='bottom', horizontalalignment='right', color='red', alpha=0.9)


# Manually setting x-ticks to each year from 2007 to 2023
years = pd.date_range(start='2008-01-01', end='2023-01-01', freq='YS')
ax.set_xticks(years)
ax.set_xticklabels([year.strftime('%Y') for year in years], rotation=45)

# Adding a custom legend, setting labels
plt.legend(fontsize=13)
plt.ylabel("%", fontsize=20, rotation=0, labelpad=20)
plt.xlabel("", fontsize=20)
plt.tight_layout()
# Show the plot
plt.savefig('./figures/general_climate_trend.svg', format='svg')


## Andreas

## Eisuke

## Anders

## Relative Party

In [None]:
dspeech = pd.merge(dspeech, dmeeting[['meeting_id', 'date']], on = 'meeting_id')
dspeech['year'] = dspeech['date'].dt.to_period('Y')
dspeech = dspeech[dspeech['speaker_party'].isin(['S', 'DF', 'ALT', 'DD', 'EL', 'FG', 'KD', 'KF', 'LA', 'M', 'NB', 'RV', 'S', 'SF', 'V'])]

In [None]:
weekly_ag = pd.DataFrame(columns=['date', 'speaker_party', 'percent_C', 'MA52'])

for party in dspeech['speaker_party'].unique().tolist():
    weekly = dspeech[dspeech['speaker_party']==party].groupby(dspeech['date'].dt.to_period('M')).agg(
        count_C=('label', lambda x: (x == 'C').sum()),
        count_NC=('label', lambda x: (x == 'NC').sum())
    )
    weekly['percent_C'] = (weekly['count_C'] / (weekly['count_C'] + weekly['count_NC'])) * 100
    weekly = weekly.reset_index()
    weekly['date'] = weekly['date'].dt.to_timestamp()
    weekly['MA52'] = weekly['percent_C'].rolling(window=48, center=True, min_periods=1).mean()
    weekly['speaker_party'] = party
    weekly_ag = pd.concat([weekly_ag, weekly])

first_MA52 = weekly_ag.groupby('speaker_party')['MA52'].transform('first')    
mean_MA52 = weekly_ag.groupby('speaker_party')['MA52'].transform('mean')
weekly_ag['percent_C_relative'] = (weekly_ag['MA52'] / first_MA52 -1) * 100
weekly_ag['percent_C_relative_all'] = (weekly_ag['MA52'] / mean_MA52 -1 ) * 100


# def mean_of_first_three(series):
#     return series.head(12).mean()

# mean_first_three_MA52 = weekly_ag.groupby('speaker_party')['MA52'].transform(mean_of_first_three)
# weekly_ag['mean_first_three_MA52'] = mean_first_three_MA52
# weekly_ag['percent_C_relative_first_threeM'] = (weekly_ag['MA52'] / weekly_ag['mean_first_three_MA52'] -1) * 100

In [None]:
#Colors from here https://gist.github.com/josiahayres-ibm/9ddc2413ee61b56f915ba602f2b9305d
carbon_categorical_light = ["#6929c4", "#1192e8", "#005d5d","#9f1853","#fa4d56","#570408","#198038","#002d9c","#ee538b","#b28600","#009d9a","#012749","#8a3800","#a56eff"]
sns.set_palette(carbon_categorical_light)
sns.palplot(sns.color_palette())

In [None]:
weekly_ag = weekly_ag[~weekly_ag['speaker_party'].isin(['M', 'KD', 'DD', 'FG'])]

from matplotlib.dates import YearLocator, DateFormatter
from matplotlib.ticker import MultipleLocator, FuncFormatter
plt.figure(figsize=(12, 12))
sns.lineplot(x='date', y='percent_C_relative', hue='speaker_party', marker=None,  data=weekly_ag, legend=False, palette=carbon_categorical_light)
plt.axhline(y=0, color='black', linestyle='--', linewidth=1)
plt.ylabel('% Climate related speech items relative to baseline', fontsize=16)
plt.xlabel(None) 

ax = plt.gca()
ax.xaxis.set_major_locator(YearLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y'))
plt.xticks(rotation=45, ha='center')

def percentage_formatter(x, pos):
    return f"{x:.0f}%"

ax.yaxis.set_major_locator(MultipleLocator(20))
ax.yaxis.set_major_formatter(FuncFormatter(percentage_formatter))
ax.tick_params(axis='both', which='major', labelsize=14) 
plt.tight_layout()
for party in weekly_ag['speaker_party'].unique():
    last_data_point = weekly_ag.loc[weekly_ag['speaker_party'] == party].iloc[-1]
    plt.annotate(party, (last_data_point['date'], last_data_point['percent_C_relative']),
                 textcoords="offset points", xytext=(25,0), ha='right', fontsize=14, color='black')
#plt.savefig('./figures/parties_change_final.svg', format='svg')

## Overall party

In [None]:
party_name_mapping = {
    'FG':'Frie Grønne',
    'ALT':'Alternativet',
    'M':'Moderaterne',
    'SF':'Socialistisk Folkeparti',
    'EL':'Enhedslisten',
    'RV':'Radikale Venstre',
    'V':'Venstre',
    'S':'Socialdemokratiet',
    'DD':'Danmarks Demokratterne',
    'KF':'Konservative Folkeparti',
    'LA':'Liberal Alliance',
    'NB':'Nye Borgerlige',
    'DF':'Dansk Folkeparti',
    'KD':'Kristendemokraterne',  
}

party = dspeech.groupby(['speaker_party']).agg(
    count_C=('label', lambda x: (x == 'C').sum()),
    count_NC=('label', lambda x: (x == 'NC').sum())
)
party['percent_C'] = (party['count_C'] / (party['count_C']+party['count_NC']))*100
party['percent_C_mean_centered'] = party['percent_C'] - party['percent_C'].mean()
#party['colors'] = [sns.xkcd_rgb['medium green'] if val >= 0 else sns.xkcd_rgb['amber'] for val in party['percent_C_mean_centered']]

party = party.reset_index()
party['speaker_party_new'] = party['speaker_party'].replace(party_name_mapping)

party['rank'] = party['percent_C_mean_centered'].rank(ascending=False)

# Sort the DataFrame by rank
party_sorted = party.sort_values('rank')
# Set the color based on the sign of percent_C_mean_centered
colors = [sns.xkcd_rgb['medium green'] if val >= 0 else sns.xkcd_rgb['light red'] for val in party_sorted['percent_C_mean_centered']]
plt.figure(figsize=(13, 7))
sns.barplot(x='percent_C_mean_centered', y='speaker_party_new', palette=colors, data=party_sorted, orient='h')
plt.xlabel('Average % climate related speech items', fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylabel('')
#sns.despine(left=True, top=True, right=True, bottom=True)

for i, (val, color, party) in enumerate(zip(party_sorted['percent_C_mean_centered'], colors, party_sorted['speaker_party_new'])):
    if party in ['Enhedslisten', 'Radikale Venstre', 'Venstre']:
        ha_value = 'right'
    else:
        ha_value = 'right' if color == sns.xkcd_rgb['medium green'] else 'left'
    plt.text(val, i, f'{val:.2f}%', va='center', ha=ha_value, fontsize=14, color='black')
    
plt.tight_layout()
#plt.savefig('./figures/overall_party.svg', format='svg')