In [1]:
import pandas as pd

In [2]:
#Read socioeconomic status per zipcode

In [3]:
socioeconomic_df = pd.read_csv("acs_socioeconomic_2019.csv")

In [4]:
#Read FEMA assistance data

In [5]:
fema_data = pd.read_csv("IndividualsAndHouseholdsProgramValidRegistrations.csv", low_memory=False)
fema_data['zip'] = fema_data['damagedZipCode'].astype(str).str.zfill(5)

In [6]:
#Filter FEMA data from 2015 - 2019

In [7]:
fema_data['declarationDate'] = pd.to_datetime(fema_data['declarationDate'], errors='coerce')
fema_filtered = fema_data[(fema_data['declarationDate'] >= '2015-01-01') & (fema_data['declarationDate'] <= '2019-12-31')]

In [8]:
#Get assistance per applicant by zip

In [9]:
fema_zip = fema_filtered.groupby('zip').agg({'ihpAmount': 'sum','id': 'count'}).reset_index().rename(columns={'id': 'num_applicants'})
fema_zip['assistance_per_applicant'] = fema_zip['ihpAmount'] / fema_zip['num_applicants']

In [10]:
#Read zip data & select zipcodes in each states

In [11]:
zipdata = pd.read_excel("uszips.xlsx")

In [12]:
zipdata['zip'] = zipdata['zip'].astype(str).str.zfill(5)

In [13]:
states = ['Florida', 'Texas', 'South Carolina', 'North Carolina', 'Louisiana', 'Georgia', 'Alabama']
states_zipdata = zipdata[zipdata['state_name'].isin(states)]

In [14]:
#Merge zipcode with FEMA data

In [15]:
states_fema = pd.merge(states_zipdata, fema_zip, on="zip", how="left")

In [16]:
#Fill 0 in non assistance areas

In [17]:
states_fema['assistance_per_applicant'] = states_fema['assistance_per_applicant'].fillna(0)

In [18]:
#Merge socioeconomic data

In [19]:
states_fema['zip'] = states_fema['zip'].astype(str).str.zfill(5)
socioeconomic_df['zip'] = socioeconomic_df['zip'].astype(str).str.zfill(5)

In [20]:
states_fema_socioeconomic = pd.merge(states_fema, socioeconomic_df, on="zip", how="inner")

In [21]:
states_fema_socioeconomic = states_fema_socioeconomic.dropna(subset=['pct_poverty', 'pct_black', 'pct_college_plus', 'pct_mortgage'])

In [22]:
#OLS regression assistance per applicant ~ socioeconomic variables

In [23]:
import statsmodels.api as sm

X = states_fema_socioeconomic[['pct_poverty', 'pct_black', 'pct_college_plus', 'pct_mortgage']]
X = sm.add_constant(X)
y = states_fema_socioeconomic['assistance_per_applicant']

model = sm.OLS(y, X).fit()

print(model.summary())

                               OLS Regression Results                               
Dep. Variable:     assistance_per_applicant   R-squared:                       0.001
Model:                                  OLS   Adj. R-squared:                  0.000
Method:                       Least Squares   F-statistic:                     1.253
Date:                      Thu, 08 May 2025   Prob (F-statistic):              0.286
Time:                              20:58:09   Log-Likelihood:                -50089.
No. Observations:                      5847   AIC:                         1.002e+05
Df Residuals:                          5842   BIC:                         1.002e+05
Df Model:                                 4                                         
Covariance Type:                  nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------

In [24]:
#Get damage amount by zip

In [25]:
fema_flood_damage = fema_filtered.groupby('zip').agg({'floodDamageAmount': 'mean'}).reset_index()

In [26]:
#Merge with previous data

In [27]:
states_merged = pd.merge(states_fema_socioeconomic, fema_flood_damage, on='zip', how='left')

In [28]:
#Fill 0 in non damaged area

In [29]:
states_merged['floodDamageAmount'] = states_merged['floodDamageAmount'].fillna(0)

In [30]:
#OLS regression assistance per applicant ~ socioeconomic variables + damage amount

In [31]:
import statsmodels.api as sm

X = states_merged[['pct_poverty', 'pct_black', 'pct_college_plus', 'pct_mortgage', 'floodDamageAmount']]
X = sm.add_constant(X)
y = states_merged['assistance_per_applicant']

model = sm.OLS(y, X).fit()
print(model.summary())

                               OLS Regression Results                               
Dep. Variable:     assistance_per_applicant   R-squared:                       0.736
Model:                                  OLS   Adj. R-squared:                  0.736
Method:                       Least Squares   F-statistic:                     3253.
Date:                      Thu, 08 May 2025   Prob (F-statistic):               0.00
Time:                              20:58:10   Log-Likelihood:                -46201.
No. Observations:                      5847   AIC:                         9.241e+04
Df Residuals:                          5841   BIC:                         9.245e+04
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------