# Compute summary socioeconomic SVI index

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

Load standard scaled data

In [2]:
svi2020 = True
if svi2020:
    filename = "SVI_EP_2020_Standard_Scaled"
    indicators = ["EP_POV150", "EP_UNEMP", "EP_HBURD", "EP_NOHSDP"]
else:
    filename = "SVI_EP_Standard_Scaled"
    indicators = ["EP_POV", "EP_UNEMP", "EP_PCI", "EP_NOHSDP"]
df_standard_scaled = pd.read_csv(f"../processed_data/{filename}.csv")

Sum all socioeconomic SVI indicators and standard scale the result

In [12]:
socioecon_indicators = df_standard_scaled.loc[:, indicators]
if not svi2020:
    # Negate PCI so low numbers correspond to less vulnerable
    socioecon_indicators['EP_PCI'] = -socioecon_indicators['EP_PCI']
summary_index = socioecon_indicators.sum(axis=1)
df_standard_scaled['svi_socioecon_summary'] = StandardScaler().fit_transform(summary_index.to_numpy().reshape(-1, 1))
# df_standard_scaled['disadvantaged'] = df_standard_scaled['svi_socioecon_summary'].apply(lambda t: 1 if (t < 0) else 0)

In [4]:
df_standard_scaled.to_csv(f"../processed_data/{filename}_Summary_Index.csv", index=False)

Visualize on map

In [16]:
import geopandas

path_to_data = "../shape_files/baltimore.shp"
gdf = geopandas.read_file(path_to_data, SHAPE_RESTORE_SHX="YES")
gdf = gdf[gdf["COUNTYFP"] == "510"] # Restrict to baltimore
df_standard_scaled = df_standard_scaled.rename(columns={'FIPS': 'GEOID'})
df_standard_scaled = df_standard_scaled.astype({'GEOID': str})
gdf = pd.merge(gdf, df_standard_scaled, on="GEOID")
mask = gdf["GEOID"].isin(["24510160801", "24510160802", "24510280401"])
gdf = gdf.loc[mask, :]
gdf.explore("svi_socioecon_summary") 