In [55]:
import pandas as pd



# Example usage
economic_data_path = '/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_bea_economic_growth.csv'
electoral_data_path = '/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_electoral_data.csv'


In [56]:
def filtering_economic_indicator(data, indicator, year):
    filtered_data = data[(data['Indicator'] == indicator) & (data['Cycle Duration'] == year)]
    return filtered_data

def filtering_electoral_data(data, year):
    filtered_data = data[data['Year'] == year]
    return filtered_data

# realGDP_data_2009_2013 = filtering_economic_indicator(economic_data, 'Real GDP', '2009-2013')
# election_2008_data =  filtering_electoral_data(electoral_data, 2008)

# print(realGDP_data_2009_2013.head())
# print(election_2008_data.head())

In [58]:
def normalize_dataset(economic_data, electoral_data, indicator, cycle_duration, election_year):
    # Normalize the column names for merging
    economic_data = economic_data.rename(columns={"State": "state", "Cycle Duration": "cycle_duration"})
    electoral_data = electoral_data.rename(columns={"State": "state", "Year": "year"})

    # Ensure 'state' columns are in the same format (e.g., uppercase)
    economic_data['state'] = economic_data['state'].str.upper()
    electoral_data['state'] = electoral_data['state'].str.upper()

    # Ensure 'year' columns are integers
    electoral_data['year'] = electoral_data['year'].astype(int)

    # Filter economic data for the specified indicator and cycle duration
    economic_df = economic_data.loc[(economic_data['Indicator'] == indicator) & (economic_data['cycle_duration'] == cycle_duration)].copy()

    # Adjust the end year to match the election year
    economic_df.loc[:, 'End Year'] = economic_df['cycle_duration'].str.split('-').str[1].astype(int) - 1

    # Filter the economic data to match the election year
    economic_df = economic_df.loc[economic_df['End Year'] == election_year]

    # Filter electoral data for the specified election year
    election_df = electoral_data.loc[electoral_data['year'] == election_year]

    return economic_df, election_df

realGDP_data_2009_2013, election_2008_data = normalize_dataset(economic_data, electoral_data, 'Real GDP', '2009-2013', 2012)  
print(realGDP_data_2009_2013.head())
print(election_2008_data.head())


           state Indicator     Term     President cycle_duration  \
0  UNITED STATES  Real GDP  Obama 1  Barack Obama      2009-2013   
1        ALABAMA  Real GDP  Obama 1  Barack Obama      2009-2013   
2         ALASKA  Real GDP  Obama 1  Barack Obama      2009-2013   
3        ARIZONA  Real GDP  Obama 1  Barack Obama      2009-2013   
4       ARKANSAS  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  End Year  
0                8.95                2.17      2012  
1                6.58                1.61      2012  
2               -0.59               -0.08      2012  
3                7.73                1.88      2012  
4                9.11                2.21      2012  
    year       state        Office Winning Candidate Winning Party  \
51  2012     ALABAMA  US PRESIDENT      ROMNEY, MITT    REPUBLICAN   
52  2012      ALASKA  US PRESIDENT      ROMNEY, MITT    REPUBLICAN   
53  2012     ARIZONA  US PRESIDENT      ROMNEY, MITT    R

In [61]:
def load_and_merge_economic_electoral_data(economic_data_path, electoral_data_path, indicator, cycle_duration, election_year):
    # Load the datasets
    economic_data = pd.read_csv(economic_data_path)
    electoral_data = pd.read_csv(electoral_data_path)
    states = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", 
          "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", 
          "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", 
          "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
          "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", 
          "Wisconsin", "Wyoming", "District of Columbia"]

    economic_data = economic_data[economic_data['State'].isin(states)]

    # Normalize the datasets
    economic_df, election_df = normalize_dataset(economic_data, electoral_data, indicator, cycle_duration, election_year)

    # Merge the datasets
    merged_df = pd.merge(economic_df, election_df, left_on=['state', 'End Year'], right_on=['state', 'year'], how='inner')

    # Drop unnecessary columns
    merged_df = merged_df.drop(columns=["End Year"])

    return merged_df

# Example usage
merged_data = load_and_merge_economic_electoral_data(economic_data_path, electoral_data_path, 'Real GDP', '2009-2013', 2012)
print(merged_data.head())

        state Indicator     Term     President cycle_duration  \
0     ALABAMA  Real GDP  Obama 1  Barack Obama      2009-2013   
1      ALASKA  Real GDP  Obama 1  Barack Obama      2009-2013   
2     ARIZONA  Real GDP  Obama 1  Barack Obama      2009-2013   
3    ARKANSAS  Real GDP  Obama 1  Barack Obama      2009-2013   
4  CALIFORNIA  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  year        Office  \
0                6.58                1.61  2012  US PRESIDENT   
1               -0.59               -0.08  2012  US PRESIDENT   
2                7.73                1.88  2012  US PRESIDENT   
3                9.11                2.21  2012  US PRESIDENT   
4               10.88                2.62  2012  US PRESIDENT   

  Winning Candidate Winning Party  Winning Percentage  Losing Candidate  \
0      ROMNEY, MITT    REPUBLICAN               60.55  OBAMA, BARACK H.   
1      ROMNEY, MITT    REPUBLICAN               54.80  OBAMA, BARACK

In [65]:
# Merging every economic indicator with electoral data 
# Three Economic Results for three election years
indicators = ['Real GDP', 'Real Per Capita Personal Income', 'Real Personal Income', 'Real Per Capita PCE', 'Real PCE', 'Total Employment']
cycle_durations = ['2009-2013', '2013-2017', '2017-2021']
election_years = [2012, 2016, 2020]

def merging_economic_electoral_data(economic_data_path, electoral_data_path, indicator, cycle_duration, election_year):
    # Merge the datasets
    merged_data = load_and_merge_economic_electoral_data(economic_data_path, electoral_data_path, indicator, cycle_duration, election_year)
    output_file = f'/workspaces/Electoral-Economic-Analysis/electoral_analysis/merged_{indicator.replace(" ", "_").lower()}_{election_year}.csv'
    merged_data.to_csv(output_file, index=False)
    return merged_data



# Merge Real GDP data for 2009-2013 with 2012 election results
merged_real_gdp_2012 = merging_economic_electoral_data(economic_data_path, electoral_data_path, 'Real GDP', '2009-2013', 2012)
print(merged_real_gdp_2012.head())

# Merge Real Personal Income data for 2009-2013 with 2012 election results
merged_real_personal_income_2012 = merging_economic_electoral_data(economic_data_path, electoral_data_path, 'Real Personal Income', '2009-2013', 2012)
print(merged_real_personal_income_2012.head())


        state Indicator     Term     President cycle_duration  \
0     ALABAMA  Real GDP  Obama 1  Barack Obama      2009-2013   
1      ALASKA  Real GDP  Obama 1  Barack Obama      2009-2013   
2     ARIZONA  Real GDP  Obama 1  Barack Obama      2009-2013   
3    ARKANSAS  Real GDP  Obama 1  Barack Obama      2009-2013   
4  CALIFORNIA  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  year        Office  \
0                6.58                1.61  2012  US PRESIDENT   
1               -0.59               -0.08  2012  US PRESIDENT   
2                7.73                1.88  2012  US PRESIDENT   
3                9.11                2.21  2012  US PRESIDENT   
4               10.88                2.62  2012  US PRESIDENT   

  Winning Candidate Winning Party  Winning Percentage  Losing Candidate  \
0      ROMNEY, MITT    REPUBLICAN               60.55  OBAMA, BARACK H.   
1      ROMNEY, MITT    REPUBLICAN               54.80  OBAMA, BARACK