In [12]:
# import packages
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns  
import statsmodels.api as sm

In [13]:
# Import data
home_dir = Path.home()
work_dir = (home_dir / 'Desktop' / 'GitHub' / 'election_inflation_analysis')
data = (work_dir / 'data')
raw_data = (data / 'raw')
clean_data = (data / 'clean')
code = Path.cwd()
output = work_dir / 'output'

### Inflation Data

In [14]:
# MSA Level Data for Inflation
raw_bea_msa = pd.read_excel(raw_data / 'bea_msa_rpp.xlsx', skiprows=5)

# Data Prep for merging
msa_rpp_df = raw_bea_msa.copy() 
msa_rpp_df = msa_rpp_df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

# generate 2020-2022 cumulative inflation (percent change in RPP):
msa_rpp_df['rpp_change_20_22'] = ((msa_rpp_df['2022'] - msa_rpp_df['2020']) / msa_rpp_df['2020'] ) * 100

# Get the right MSA format
msa_rpp_df['msa'] = msa_rpp_df['GeoName'].str.split(',').str[0]
msa_rpp_df['state'] = msa_rpp_df['GeoName'].str.split(',').str[1].str.strip().str.split().str[0]
msa_rpp_df['msa_short'] = msa_rpp_df['msa'].str.split('-').str[0]
msa_rpp_df['state_short'] = msa_rpp_df['state'].str.split('-').str[0]

#Get Categories
recode_categories_dict = {
    'rpps: all items': 'all items',
    '  rpps: goods': 'goods',
    '  rpps: services: housing': 'housing',
    '  rpps: services: utilities': 'utilities',
    '  rpps: services: other': 'other services'
}
msa_rpp_df['Description'] = msa_rpp_df['Description'].replace(recode_categories_dict)
msa_rpp_df.drop(columns=['LineCode'], inplace=True)

# re-name columns
msa_rpp_df.rename(columns = {
    'Description': 'category',
    'GeoName': 'msa_full'
    }, inplace=True)

msa_rpp_df['msa'] = msa_rpp_df['msa_short'] + ', ' + msa_rpp_df['state_short']
keep = ['msa', 'category', 'rpp_change_20_22', 'msa_full']
msa_inflation_bea = msa_rpp_df[keep]
# msa_rpp_df.to_csv(f'{inflation_data}/clean/bea_msa_inflation.csv', index=False)

  msa_rpp_df = msa_rpp_df.applymap(lambda x: x.lower() if isinstance(x, str) else x)


### Vote Swing and Housing Index

In [15]:
# Vote swing data prep
file = output / "vote_swing.csv"
df_election = pd.read_csv(file)
df_election.drop(columns = 'msa_short', inplace = True)
df_election.rename(columns = {'regionname': 'msa'}, inplace = True)


# Zillow 2024 index prep
zillow_rent = raw_data / 'zillow_median_rent_raw.csv'
df_zillow = pd.read_csv(zillow_rent)
df_zillow.columns = df_zillow.columns.str.lower()
df_zillow['regionname'] = df_zillow['regionname'].str.lower()

keep = ['regionname', '10/31/2024']
df_zillow = df_zillow[keep]
df_zillow.rename(columns = {'regionname': 'msa'}, inplace = True)

# Merge zillow and elecuib
zillow_election = pd.merge(df_zillow, df_election, on = 'msa', how = 'outer')
zillow_election.rename(columns = {'10/31/2024': 'zillow_index_2024'}, inplace = True)
print(zillow_election.columns)

Index(['msa', 'zillow_index_2024', 'vote_swing'], dtype='object')


## Creating Master Dataset

In [None]:
# Merge
regression_data = pd.merge(zillow_election, msa_inflation_bea, on = 'msa', how = 'outer')
# Keep only all items rows
regression_data = regression_data.loc[regression_data['category'] == 'goods']
regression_data.drop(columns = {'msa_full', 'category'}, inplace = True)

# Drop missing values
regression_data.dropna(inplace = True)

In [18]:
# Cross-Sectional Regression with Inflation and House Price Index
regression_data['interaction'] = regression_data['zillow_index_2024'] * regression_data['rpp_change_20_22']

X = regression_data[['rpp_change_20_22', 'zillow_index_2024', 'interaction']]
X = sm.add_constant(X)
y = regression_data['vote_swing'] 
# Fit the OLS regression model
model = sm.OLS(y, X).fit()

# Display regression results
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:             vote_swing   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     1.983
Date:                Mon, 09 Dec 2024   Prob (F-statistic):              0.117
Time:                        17:10:59   Log-Likelihood:                -636.47
No. Observations:                 308   AIC:                             1281.
Df Residuals:                     304   BIC:                             1296.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 1.1898      0.44