In [2]:
import pandas as pd

# Load the csv file
bea_data = pd.read_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/bea_data.csv', skiprows=3)

# Drop the 'GeoFips' and 'LineCode' columns
bea_data = bea_data.drop(columns=['GeoFips', 'LineCode'])

# Dictionary to map original descriptions to concise descriptions
description_mapping = {
    'Real per capita personal income 4': 'Real Per Capita Personal Income',
    'Real per capita PCE 5': 'Real Per Capita PCE',
    'Real GDP (millions of chained 2017 dollars) 1': 'Real GDP',
    'Real personal income (millions of constant (2017) dollars) 2': 'Real Personal Income',
    'Real PCE (millions of constant (2017) dollars) 3': 'Real PCE',
    'Total employment (number of jobs)': 'Total Employment'
}

# Strip any leading or trailing whitespace from the 'Description' column
bea_data['Description'] = bea_data['Description'].str.strip()

# Filter the DataFrame to only include rows with descriptions that match the keys in the description_mapping dictionary
bea_data = bea_data[bea_data['Description'].isin(description_mapping.keys())]

# Replace the descriptions with the concise descriptions
bea_data['Description'] = bea_data['Description'].replace(description_mapping)


economic_data = bea_data

economic_data.head()

Unnamed: 0,GeoName,Description,1998,1999,2000,2001,2002,2003,2004,2005,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
1,United States,Real GDP,12924876.0,13543774.0,14096033.0,14230726.0,14472712.0,14877312.0,15449757.0,15987957.0,...,18261714.0,18799622.0,19141672.0,19612102.0,20193896.0,20715671.0,20267585.0,21494798.0,22034828.0,22671096.0
2,United States,Real Personal Income,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),...,15216230.2,15896447.7,16162029.4,16658962.0,17163074.0,17727562.0,18731605.3,19641720.3,19013960.0,19404131.7
3,United States,Real PCE,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),...,12226445.7,12638789.0,12949012.2,13290625.5,13654925.4,13948133.0,13594721.8,14787232.3,15236191.5,15621697.3
10,United States,Real Per Capita Personal Income,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),...,47659.0,49394.0,49826.0,51004.0,52240.0,53682.0,56501.0,59153.0,57052.0,57937.0
11,United States,Real Per Capita PCE,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),...,38295.0,39272.0,39921.0,40691.0,41562.0,42237.0,41006.0,44533.0,45717.0,46644.0


In [7]:
import pandas as pd

# # Function to calculate year-over-year growth rates
# def calculate_annual_growth(values):
#     growth_rates = []
    
#     # Calculate year-over-year growth
#     for i in range(1, len(values)):
#         start_value = values[i - 1]
#         end_value = values[i]
        
#         # Skip calculation if start_value or end_value is NaN
#         if pd.isna(start_value) or pd.isna(end_value):
#             continue
        
#         # Growth rate formula
#         growth_rate = round(((end_value - start_value) / start_value) * 100, 2)
#         growth_rates.append(growth_rate)
    
#     return growth_rates

# # Function to calculate overall growth rate for a given term
# def calculate_overall_growth(start_value, end_value):
#     if pd.isna(start_value) or pd.isna(end_value):
#         return None
    
#     # Overall growth rate formula
#     overall_growth_rate = round(((end_value - start_value) / start_value) * 100, 2)
#     return overall_growth_rate


# # Convert year columns to numeric
# year_columns = [col for col in economic_data.columns if col.isdigit()]
# economic_data[year_columns] = economic_data[year_columns].apply(pd.to_numeric, errors='coerce')

# # Define the presidential administrations
# administrations = {
#     'Clinton': (1993, 2000),
#     'Bush': (2001, 2008),
#     'Obama': (2009, 2016),
#     'Trump': (2017, 2020),
#     'Biden': (2021, 2023)  # Assuming data is available up to 2023
# }

# # Add new columns for each administration's average and overall growth rates
# for admin, (start_year, end_year) in administrations.items():
#     economic_data[f'{admin} Avg Growth Rate'] = None
#     economic_data[f'{admin} Overall Growth Rate'] = None
    
#     for index, row in economic_data.iterrows():
#         # Extract the values for the specified years
#         years = [str(year) for year in range(start_year, end_year + 1) if str(year) in economic_data.columns]
        
#         if len(years) < 2:
#             continue
        
#         values = row[years].values
        
#         # Calculate the annual growth rates for the term
#         annual_growth_rates = calculate_annual_growth(values)
        
#         # Calculate the average growth rate for the term
#         if annual_growth_rates:
#             avg_growth_rate = round(sum(annual_growth_rates) / len(annual_growth_rates), 2)
#             economic_data.at[index, f'{admin} Avg Growth Rate'] = avg_growth_rate
        
#         # Calculate the overall growth rate for the term
#         overall_growth_rate = calculate_overall_growth(values[0], values[-1])
#         economic_data.at[index, f'{admin} Overall Growth Rate'] = overall_growth_rate

# Rename the columns to make them more clear and concise
economic_data = economic_data.rename(columns={
    'GeoName': 'State',
    'Description': 'Indicator',
    'Clinton Avg Growth Rate': 'Clinton Avg GR (%)',
    'Clinton Overall Growth Rate': 'Clinton Overall GR (%)',
    'Bush Avg Growth Rate': 'Bush Avg GR (%)',
    'Bush Overall Growth Rate': 'Bush Overall GR (%)',
    'Obama Avg Growth Rate': 'Obama Avg GR (%)',
    'Obama Overall Growth Rate': 'Obama Overall GR (%)',
    'Trump Avg Growth Rate': 'Trump Avg GR (%)',
    'Trump Overall Growth Rate': 'Trump Overall GR (%)',
    'Biden Avg Growth Rate': 'Biden Avg GR (%)',
    'Biden Overall Growth Rate': 'Biden Overall GR (%)'
})
# Print the first few rows to verify the changes
print(economic_data.head())

# Save the DataFrame to a CSV file
economic_data.to_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/State_Economic_Grwoth_Rate_data.csv', index=False)

            State                        Indicator        1998        1999  \
1   United States                         Real GDP  12924876.0  13543774.0   
2   United States             Real Personal Income         NaN         NaN   
3   United States                         Real PCE         NaN         NaN   
10  United States  Real Per Capita Personal Income         NaN         NaN   
11  United States              Real Per Capita PCE         NaN         NaN   

          2000        2001        2002        2003        2004        2005  \
1   14096033.0  14230726.0  14472712.0  14877312.0  15449757.0  15987957.0   
2          NaN         NaN         NaN         NaN         NaN         NaN   
3          NaN         NaN         NaN         NaN         NaN         NaN   
10         NaN         NaN         NaN         NaN         NaN         NaN   
11         NaN         NaN         NaN         NaN         NaN         NaN   

    ...  Clinton Avg GR (%)  Clinton Overall GR (%)  Bush Avg 

In [12]:
import pandas as pd

# Load the dataset
file_path = "/workspaces/Electoral-Economic-Analysis/electoral_analysis/State_Economic_Grwoth_Rate_data.csv"
data = pd.read_csv(file_path)

# Define the presidential terms and their corresponding cycles
terms = {
    'Clinton 2': (1998, 2001, 'Bill Clinton'),
    'Bush 1': (2001, 2005, 'George W. Bush'),
    'Bush 2': (2005, 2009, 'George W. Bush'),
    'Obama 1': (2009, 2013, 'Barack Obama'),
    'Obama 2': (2013, 2017, 'Barack Obama'),
    'Trump': (2017, 2021, 'Donald Trump'),
    'Biden': (2021, 2023, 'Joe Biden')  # Assuming data is available up to 2023
}

# Initialize result storage
results = []

# Iterate over each term
for term, (start_year, end_year, president) in terms.items():
    for _, row in data.iterrows():
        state = row["State"]
        indicator = row["Indicator"]
        
        # Get start and end values
        start_value = row.get(str(start_year))
        end_value = row.get(str(end_year))
        
        # Skip if data is missing
        if pd.isna(start_value) or pd.isna(end_value):
            continue
        
        # Calculate overall growth rate
        overall_growth = ((end_value - start_value) / start_value) * 100
         
        # Calculate average growth rate
        annual_growth_rates = []
        for year in range(start_year, end_year):
            next_year = year + 1
            year_start_value = row.get(str(year))
            year_end_value = row.get(str(next_year))
            if year_start_value is not None and year_end_value is not None:
                if not pd.isna(year_start_value) and not pd.isna(year_end_value):
                    annual_growth = ((year_end_value - year_start_value) / year_start_value) * 100
                    annual_growth_rates.append(annual_growth)
        
        if annual_growth_rates:
            avg_growth = sum(annual_growth_rates) / len(annual_growth_rates)
        else:
            avg_growth = None
        
        # Store result
        results.append({
            "State": state,
            "Indicator": indicator,
            "Term": term,
            "President": president,
            "Cycle Duration": f"{start_year}-{end_year}",
            "Overall Growth (%)": round(overall_growth, 2),
            "Average Growth (%)": round(avg_growth, 2) if avg_growth is not None else None,
        })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
output_path = "/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_state_economic_growth.csv"
results_df.to_csv(output_path, index=False)

print(f"Processed data saved to {output_path}")
print("Unique indicators processed:", results_df['Indicator'].unique())
print(results_df.head())

Processed data saved to /workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_state_economic_growth.csv
Unique indicators processed: ['Real GDP' 'Total Employment' 'Real Personal Income' 'Real PCE'
 'Real Per Capita Personal Income' 'Real Per Capita PCE']
           State         Indicator       Term     President Cycle Duration  \
0  United States          Real GDP  Clinton 2  Bill Clinton      1998-2001   
1  United States  Total Employment  Clinton 2  Bill Clinton      1998-2001   
2        Alabama          Real GDP  Clinton 2  Bill Clinton      1998-2001   
3        Alabama  Total Employment  Clinton 2  Bill Clinton      1998-2001   
4         Alaska          Real GDP  Clinton 2  Bill Clinton      1998-2001   

   Overall Growth (%)  Average Growth (%)  
0               10.10                3.27  
1                4.44                1.46  
2                5.02                1.66  
3                0.60                0.20  
4               -0.62               -0.1

In [7]:
# Load the dataset
file_path = "/workspaces/My_Vault/electoral_analysis/State_Economic_Grwoth_Rate_data.csv"
data = pd.read_csv(file_path)

data['1998'] + data['1999'] + data['2000'] + data 