In [None]:
import pandas as pd

# Load the csv file
bea_data = pd.read_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/bea_data.csv', skiprows=3)

# Drop the 'GeoFips' and 'LineCode' columns
bea_data = bea_data.drop(columns=['GeoFips', 'LineCode'])


economic_data = bea_data

economic_data.to_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/cleaned_bea_data.csv', index=False)
economic_data.head()

In [None]:
import pandas as pd

# Load the csv file
bea_data = pd.read_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/cleaned_bea_data.csv')

# Dictionary to map original descriptions to concise descriptions
description_mapping = {
    'GeoName': 'State',
    'Real per capita personal income 4': 'Real Per Capita Personal Income',
    'Real per capita PCE 5': 'Real Per Capita PCE',
    'Real GDP (millions of chained 2017 dollars) 1': 'Real GDP',
    'Real personal income (millions of constant (2017) dollars) 2': 'Real Personal Income',
    'Real PCE (millions of constant (2017) dollars) 3': 'Real PCE',
    'Total employment (number of jobs)': 'Total Employment'
}

# Strip any leading or trailing whitespace from the 'Description' column
bea_data['Description'] = bea_data['Description'].str.strip()

# Filter the DataFrame to only include rows with descriptions that match the keys in the description_mapping dictionary
bea_data = bea_data[bea_data['Description'].isin(description_mapping.keys())]

# Replace the descriptions with the concise descriptions
bea_data['Description'] = bea_data['Description'].replace(description_mapping)

print(bea_data.columns)

# Drop columns that are not in the description mapping
columns_to_keep = ['GeoName', 'Description'] + [str(year) for year in range(2008, 2024)]
bea_data = bea_data[columns_to_keep]

# Save the cleaned data to a new CSV file
output_path = '/workspaces/Electoral-Economic-Analysis/electoral_analysis/filtered_bea_data.csv'
bea_data.to_csv(output_path, index=False)

print(f"Cleaned data saved to {output_path}")

In [None]:
# Rename the columns to make them more clear and concise
economic_data = economic_data.rename(columns={
    'GeoName': 'State',
    'Description': 'Indicator',
})
# Print the first few rows to verify the changes
print(economic_data.head())

# Save the DataFrame to a CSV file
economic_data.to_csv('/workspaces/Electoral-Economic-Analysis/electoral_analysis/State_Economic_Grwoth_Rate_data.csv', index=False)

In [None]:
import pandas as pd

# Load the dataset
file_path = "/workspaces/Electoral-Economic-Analysis/electoral_analysis/filtered_bea_data.csv"
data = pd.read_csv(file_path)
# Rename the columns to make them more clear and concise
data = data.rename(columns={
    'GeoName': 'State',
    'Description': 'Indicator',
})

# Define the presidential terms and their corresponding cycles
terms = {
    # 'Clinton 2': (1998, 2001, 'Bill Clinton'),
    # 'Bush 1': (2001, 2005, 'George W. Bush'),
    # 'Bush 2': (2005, 2009, 'George W. Bush'),
    'Obama 1': (2009, 2013, 'Barack Obama'),
    'Obama 2': (2013, 2017, 'Barack Obama'),
    'Trump': (2017, 2021, 'Donald Trump'),
    'Biden': (2021, 2023, 'Joe Biden')  # Assuming data is available up to 2023
}

# Initialize result storage
results = []

# Iterate over each term
for term, (start_year, end_year, president) in terms.items():
    for indicator in data['Indicator'].unique():
        for state in data['State'].unique():
            state_data = data[(data['State'] == state) & (data['Indicator'] == indicator)]
            start_value = state_data.get(str(start_year)).values[0]
            end_value = state_data.get(str(end_year)).values[0]

            # Skip if data is missing
            if pd.isna(start_value) or pd.isna(end_value):
                continue

            # Calculate overall growth rate
            overall_growth = ((end_value - start_value) / start_value) * 100

            # Calculate average growth rate
            annual_growth_rates = []
            for year in range(start_year, end_year):
                next_year = year + 1
                year_start_value = state_data.get(str(year)).values[0]
                year_end_value = state_data.get(str(next_year)).values[0]
                if year_start_value is not None and year_end_value is not None:
                    if not pd.isna(year_start_value) and not pd.isna(year_end_value):
                        annual_growth = ((year_end_value - year_start_value) / year_start_value) * 100
                        annual_growth_rates.append(annual_growth)

            if annual_growth_rates:
                avg_growth = sum(annual_growth_rates) / len(annual_growth_rates)
            else:
                avg_growth = None

            # Store result
            results.append({
                "State": state,
                "Indicator": indicator,
                "Term": term,
                "President": president,
                "Cycle Duration": f"{start_year}-{end_year}",
                "Overall Growth (%)": round(overall_growth, 2),
                "Average Growth (%)": round(avg_growth, 2) if avg_growth is not None else None,
            })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
output_path = "/workspaces/Electoral-Economic-Analysis/electoral_analysis/processed_bea_economic_growth.csv"
results_df.to_csv(output_path, index=False)

print(f"Processed data saved to {output_path}")
print("Unique indicators processed:", results_df['Indicator'].unique())
print(results_df.head())