In [1]:
import pandas as pd 
import numpy as np 
import math 
import scipy.stats as stats
import matplotlib.pyplot as plt

In [2]:
# Election details
election_times = {
    'UK': {
        'General': ['2010-05-06', '2015-05-07', '2017-06-08', '2019-12-12', '2024-07-04']
    },
    'USA-Represantatives': {
        'Represantatives': ['2012-11-06', '2014-11-04', '2016-11-08', '2018-11-06', '2020-11-03'],
    },
    'USA-Senat': {
        'Senators': ['2012-11-06', '2014-11-04', '2016-11-08', '2018-11-06', '2020-11-03']
    },  
    'Germany': {
        'Bundestag': ['2013-09-22', '2017-09-24', '2021-09-26']
    },
    'Austria': {
        'Nationalrat': ['2013-09-29', '2017-10-15', '2019-09-29', '2024-09-29']
    }
}

In [3]:
## reading data
german = pd.read_csv('../data/Final Datasets/Final Datasets/Germany_politician_data.csv')
german['Date'] = pd.to_datetime(german['Date'])


In [6]:

def group_by_date(date, data, offset =3):
    """
    Gruppiert die Daten basierend auf einem Datum und einem ±3-Monats-Zeitraum.

    Args:
        date (str or pd.Timestamp): Referenzdatum.
        data (pd.DataFrame): DataFrame mit einer 'Date'-Spalte.

    Returns:
        tuple: Zwei DataFrames - innerhalb und außerhalb des Zeitraums.
    """
    
    date = pd.to_datetime(date)
    date_before = date - pd.DateOffset(months=offset)
    date_after = date + pd.DateOffset(months=offset)

    # Filtere die Daten
    values_in_date_range = data[(data['Date'] >= date_before) & (data['Date'] <= date_after)]
    values_outside_date_range = data[(data['Date'] < date_before) | (data['Date'] > date_after)]

    return values_in_date_range, values_outside_date_range

def filter_election_periods(data:pd.DataFrame,dates:list[str])->tuple[pd.DataFrame,pd.DataFrame]:
    election_data = pd.DataFrame()  

    for date in dates:
        single_election_data, not_election_data = group_by_date(date, not_election_data)
        election_data = pd.concat([election_data, single_election_data], ignore_index=True)
    return(election_data, not_election_data)



In [7]:
election_data = pd.DataFrame()  # Initialize an empty DataFrame
not_election_data = german  # Start with the full dataset

for date in ['2013-09-22', '2017-09-24', '2021-09-26']:
    single_election_data, not_election_data = group_by_date(date, not_election_data)
    # Correct the typo in variable name
    election_data = pd.concat([election_data, single_election_data], ignore_index=True)


In [8]:
stats.ttest_ind(a=election_data.changes, b=not_election_data.changes, equal_var=False)

TtestResult(statistic=2.7616597920110517, pvalue=0.005751418605617925, df=180295.13714212595)