In [30]:
import pandas as pd
import statsmodels.formula.api as smf
from sklearn.metrics import root_mean_squared_error


file_path = 'new_hope.csv'
df = pd.read_csv(file_path)


In [31]:

df = df.rename(columns={
    'State.Minimum.Wage.2020.Dollars': 'state_minimum_wage_2020_dollars',
    'Federal.Minimum.Wage.2020.Dollars': 'federal_minimum_wage_2020_dollars',
    'Effective.Minimum.Wage.2020.Dollars': 'effective_minimum_wage_2020_dollars',
    'CPI.Average': 'cpi_average',
    'Total Civilian Non-Institutional Population in State/Area': 'total_civilian_non_institutional_population',
    'Total Civilian Labor Force in State/Area': 'total_civilian_labor_force',
    'Percent (%) of Labor Force Unemployed in State/Area': 'unemployment_rate',
    'Population': 'population',
    'Murder': 'murder',
    'rape': 'rape',
    'Burglary': 'burglary',
    'Aggravated assault': 'aggravated_assault',
    'property total': 'property_total',
    'Larceny theft': 'larceny_theft',
    'Robbery': 'robbery',
    'Effective Federal Funds Rate': 'effective_federal_funds_rate',
    'violent total': 'violent_total'
})


print("Column names after renaming:", df.columns)

df['treatment_min_wage'] = df.apply(lambda row: 1 if row['effective_minimum_wage_2020_dollars'] > row['federal_minimum_wage_2020_dollars'] else 0, axis=1)

df['time'] = df['Year'].apply(lambda x: 1 if x >= 2009 else 0)

df['did'] = df['treatment_min_wage'] * df['time']

outcome_var = 'unemployment_rate'

significant_features = [
    'effective_minimum_wage_2020_dollars',
    'effective_federal_funds_rate',
    'cpi_average', 
    'rape',
    'murder', 
    'robbery',
    'violent_total',
    'aggravated_assault',
    'state_minimum_wage_2020_dollars'
]


significant_features += ['did']

formula = f"{outcome_var} ~ {' + '.join(significant_features)}"

model = smf.ols(formula, data=df).fit()

print(model.summary())

Column names after renaming: Index(['Year', 'State', 'state_minimum_wage_2020_dollars',
       'federal_minimum_wage_2020_dollars',
       'effective_minimum_wage_2020_dollars', 'cpi_average',
       'total_civilian_non_institutional_population',
       'total_civilian_labor_force', 'unemployment_rate', 'total_import',
       'GDP', 'Real GDP*', 'effective_federal_funds_rate', 'Inflation Rate',
       'population', 'violent_total', 'murder', 'rape', 'robbery',
       'aggravated_assault', 'property_total', 'burglary', 'larceny_theft',
       'vehicle theft'],
      dtype='object')
                            OLS Regression Results                            
Dep. Variable:      unemployment_rate   R-squared:                       0.492
Model:                            OLS   Adj. R-squared:                  0.488
Method:                 Least Squares   F-statistic:                     112.6
Date:                Tue, 19 Nov 2024   Prob (F-statistic):          3.45e-163
Time:            

In [32]:

coefficients = pd.DataFrame({
    'Feature': model.params.index,
    'Coefficient': model.params.values
})

print(coefficients)

                                Feature  Coefficient
0                             Intercept     5.131623
1   effective_minimum_wage_2020_dollars     0.421167
2          effective_federal_funds_rate    -0.528291
3                           cpi_average    -0.007512
4                                  rape     0.075339
5                                murder     0.073600
6                               robbery     0.080457
7                         violent_total    -0.077059
8                    aggravated_assault     0.078128
9       state_minimum_wage_2020_dollars    -0.076982
10                                  did     1.387561


In [33]:
pred = model.predict()

rmse = root_mean_squared_error(df['unemployment_rate'],pred)
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 1.370462679250257


# Instrumental Var

In [34]:
from linearmodels.iv import IV2SLS

df['treatment_min_wage'] = df.apply(lambda row: 1 if row['effective_minimum_wage_2020_dollars'] > row['federal_minimum_wage_2020_dollars'] else 0, axis=1)

df['time'] = df['Year'].apply(lambda x: 1 if x >= 2000 else 0)

df['did'] = df['treatment_min_wage'] * df['time']

outcome_var = 'unemployment_rate'
endogenous_var = 'effective_minimum_wage_2020_dollars'
instrument_var = 'state_minimum_wage_2020_dollars'

exogenous_vars = [
    'effective_federal_funds_rate',
    'cpi_average', 
    'rape',
    'murder', 
    'robbery',
    'violent_total',
    'aggravated_assault'
]

iv_formula = f"{outcome_var} ~ 1 + {' + '.join(exogenous_vars)} + [{endogenous_var} ~ {instrument_var}]"



iv_model = IV2SLS.from_formula(iv_formula, data=df).fit()

print(iv_model.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:      unemployment_rate   R-squared:                      0.4326
Estimator:                    IV-2SLS   Adj. R-squared:                 0.4287
No. Observations:                1173   F-statistic:                    831.73
Date:                Tue, Nov 19 2024   P-value (F-stat)                0.0000
Time:                        21:15:21   Distribution:                  chi2(8)
Cov. Estimator:                robust                                         
                                                                              
                                          Parameter Estimates                                          
                                     Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------------------------
Intercept                               6.8901     1.073

In [35]:

coefficients = pd.DataFrame({
    'Feature': iv_model.params.index,
    'Coefficient': iv_model.params.values
})

print(coefficients)

                               Feature  Coefficient
0                            Intercept     6.890094
1         effective_federal_funds_rate    -0.599103
2                          cpi_average    -0.003719
3                                 rape     0.061835
4                               murder     0.049187
5                              robbery     0.066566
6                        violent_total    -0.062659
7                   aggravated_assault     0.064133
8  effective_minimum_wage_2020_dollars     0.066160


In [36]:
pred = iv_model.predict().fitted_values

In [37]:
pred = iv_model.predict().fitted_values

rmse = root_mean_squared_error(df['unemployment_rate'],pred)
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 1.4484229644433066
