In [20]:
import pandas as pd

economic_data = pd.read_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_bea_economic_growth.csv')
electoral_data = pd.read_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_electoral_data.csv')

states = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", 
          "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", 
          "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", 
          "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
          "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", 
          "Wisconsin", "Wyoming", "District of Columbia"]

economic_data = economic_data[economic_data['State'].isin(states)]


print(economic_data.head())

print(economic_data)

        State Indicator     Term     President Cycle Duration  \
1     Alabama  Real GDP  Obama 1  Barack Obama      2009-2013   
2      Alaska  Real GDP  Obama 1  Barack Obama      2009-2013   
3     Arizona  Real GDP  Obama 1  Barack Obama      2009-2013   
4    Arkansas  Real GDP  Obama 1  Barack Obama      2009-2013   
5  California  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  Start Year  
1                6.58                1.61        2008  
2               -0.59               -0.08        2008  
3                7.73                1.88        2008  
4                9.11                2.21        2008  
5               10.88                2.62        2008  
              State         Indicator     Term     President Cycle Duration  \
1           Alabama          Real GDP  Obama 1  Barack Obama      2009-2013   
2            Alaska          Real GDP  Obama 1  Barack Obama      2009-2013   
3           Arizona          Real GD

In [27]:
# Normalize the column names for merging
economic_data.rename(columns={"State": "state", "Cycle Duration": "cycle_duration"}, inplace=True)
electoral_data.rename(columns={"State": "state", "Year": "year"}, inplace=True)

# Step 1: Extract the start year from the Cycle Duration
economic_data['cycle_duration'] = economic_data['cycle_duration'].astype(str)
economic_data['Start Year'] = economic_data['cycle_duration'].str.split('-').str[0].astype(int) - 1

# Ensure 'state' columns are in the same format (e.g., uppercase)
economic_data['state'] = economic_data['state'].str.upper()
electoral_data['state'] = electoral_data['state'].str.upper()

# Ensure 'year' columns are integers
economic_data['Start Year'] = economic_data['Start Year'].astype(int)
electoral_data['year'] = electoral_data['year'].astype(int)

# Merging two relative dataframes
merged_data = pd.merge(economic_data, electoral_data, left_on=['state', 'Start Year'], right_on=['state', 'year'], how='inner')

# Drop unnecessary columns and clean up the data
merged_data = merged_data.drop(columns=["Start Year"])

# Display the merged dataset
print(merged_data.head())

# Save the merged dataset to a CSV file
merged_data.to_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/merged_economic_electoral_data.csv', index=False)

        state Indicator     Term     President cycle_duration  \
0     ALABAMA  Real GDP  Obama 1  Barack Obama      2009-2013   
1      ALASKA  Real GDP  Obama 1  Barack Obama      2009-2013   
2     ARIZONA  Real GDP  Obama 1  Barack Obama      2009-2013   
3    ARKANSAS  Real GDP  Obama 1  Barack Obama      2009-2013   
4  CALIFORNIA  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  year        Office  \
0                6.58                1.61  2008  US PRESIDENT   
1               -0.59               -0.08  2008  US PRESIDENT   
2                7.73                1.88  2008  US PRESIDENT   
3                9.11                2.21  2008  US PRESIDENT   
4               10.88                2.62  2008  US PRESIDENT   

  Winning Candidate Winning Party  Winning Percentage  Losing Candidate  \
0      MCCAIN, JOHN    REPUBLICAN               60.32  OBAMA, BARACK H.   
1      MCCAIN, JOHN    REPUBLICAN               59.42  OBAMA, BARACK

In [21]:
# Electoral data filtering

election_2008_data = electoral_data[electoral_data['Year'] == 2008]
election_2012_data = electoral_data[electoral_data['Year'] == 2012]
election_2016_data = electoral_data[electoral_data['Year'] == 2016]
election_2020_data = electoral_data[electoral_data['Year'] == 2020]

# Print the number of unique electoral result in each filtered dataset
print("Unique electoral results in 2008:", election_2008_data['State'].nunique())
print("Unique electoral results in 2012:", election_2012_data['State'].nunique())
print("Unique electoral results in 2016:", election_2016_data['State'].nunique())
print("Unique electoral results in 2020:", election_2020_data['State'].nunique())

Unique electoral results in 2008: 51
Unique electoral results in 2012: 51
Unique electoral results in 2016: 51
Unique electoral results in 2020: 51


In [22]:
# Filter data for Real GDP
realGDP_data = economic_data[economic_data['Indicator'] == 'Real GDP']

# Filter data for Real per capita personal income
real_per_capita_personal_income_data = economic_data[economic_data['Indicator'] == 'Real Per Capita Personal Income']

# Filter data for Real personal income
real_personal_income_data = economic_data[economic_data['Indicator'] == 'Real Personal Income']

# Filter data for Real per capita PCE
real_per_capita_PCE_data = economic_data[economic_data['Indicator'] == 'Real Per Capita PCE']

# Filter data for Real PCE
real_PCE_data = economic_data[economic_data['Indicator'] == 'Real PCE']

# Filter data for Total employment
total_employment_data = economic_data[economic_data['Indicator'] == 'Total Employment']

# Print the number of unique states in each filtered dataset
print(len(realGDP_data['State'].unique()))
print(len(real_per_capita_personal_income_data['State'].unique()))
print(len(real_personal_income_data['State'].unique()))
print(len(real_per_capita_PCE_data['State'].unique()))
print(len(real_PCE_data['State'].unique()))
print(len(total_employment_data['State'].unique()))

# Print the number of unique states in the electoral data
print(len(electoral_data['State'].unique()))


print(realGDP_data.head())


51
51
51
51
51
51
51
        State Indicator     Term     President Cycle Duration  \
1     Alabama  Real GDP  Obama 1  Barack Obama      2009-2013   
2      Alaska  Real GDP  Obama 1  Barack Obama      2009-2013   
3     Arizona  Real GDP  Obama 1  Barack Obama      2009-2013   
4    Arkansas  Real GDP  Obama 1  Barack Obama      2009-2013   
5  California  Real GDP  Obama 1  Barack Obama      2009-2013   

   Overall Growth (%)  Average Growth (%)  Start Year  
1                6.58                1.61        2008  
2               -0.59               -0.08        2008  
3                7.73                1.88        2008  
4                9.11                2.21        2008  
5               10.88                2.62        2008  
