In [10]:
import os
import warnings

import geopandas as gpd
import pandas as pd
import numpy as np

In [12]:
svi_path = '/home/h6x/git_projects/ornl-svi-data-processing/raw_data/svi/2018/SVI2018_US_tract.gdb'
overdose_path = '/home/h6x/Projects/overdose_modeling/data/HepVu_County_Opioid_Indicators_05DEC22.xlsx'

In [3]:
def preprocess_overdose_data(overdose_df):
    """Preprocess overdose data."""
    overdose_df['GEO ID'] = overdose_df['GEO ID'].astype(str)
    overdose_df['GEO ID'] = overdose_df['GEO ID'].apply(lambda x: x.zfill(5))
    return overdose_df

In [4]:
overdose_df = pd.read_excel(overdose_path)

In [6]:
overdose_df = preprocess_overdose_data(overdose_df)

In [8]:
overdose_df.head()

Unnamed: 0,GEO ID,State Abbreviation,County Name,Opioid Prescription Rate 2020,Narcotic Overdose Mortality Rate 2014,Narcotic Overdose Mortality Rate 2015,Narcotic Overdose Mortality Rate 2016,Narcotic Overdose Mortality Rate 2017,Narcotic Overdose Mortality Rate 2018,Narcotic Overdose Mortality Rate 2019,Narcotic Overdose Mortality Rate 2020
0,1001,AL,Autauga County,98.3,8.2,8.8,10.7,9.8,10.9,9.2,11.6
1,1003,AL,Baldwin County,65.0,18.0,20.0,16.6,15.1,14.9,14.5,27.4
2,1005,AL,Barbour County,22.8,4.4,4.5,5.7,5.8,5.2,5.7,7.6
3,1007,AL,Bibb County,24.8,17.2,16.6,22.6,21.7,23.1,19.4,27.3
4,1009,AL,Blount County,22.8,18.6,18.9,22.7,27.0,19.9,20.3,24.2


In [9]:
def preprocess_svi_data(us_svi, raw_variables):
    """Preprocess SVI data by removing invalid values and normalizing."""
    # for variable in raw_variables:
        # us_svi = us_svi[us_svi[variable] != -999.00]

    for var in raw_variables:
        max_val = us_svi[var].max()
        min_val = us_svi[var].min()
        us_svi[var] = (us_svi[var] - min_val) / (max_val - min_val)
    
    return us_svi

In [13]:
us_svi = gpd.read_file(svi_path)

In [15]:
us_svi.head()

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP,Shape_Length,Shape_Area,geometry
0,1,ALABAMA,AL,1015,Calhoun,1015981901,"Census Tract 9819.01, Calhoun County, Alabama",24.243865,0,12,...,-999,-999,0,12,-999.0,-999.0,656,0.3394,0.006105,"MULTIPOLYGON (((-86.01323 33.68426, -86.01224 ..."
1,1,ALABAMA,AL,1015,Calhoun,1015981902,"Census Tract 9819.02, Calhoun County, Alabama",34.646714,0,12,...,-999,-999,0,12,-999.0,-999.0,146,0.420459,0.008752,"MULTIPOLYGON (((-86.01730 33.69662, -86.01704 ..."
2,1,ALABAMA,AL,1015,Calhoun,1015981903,"Census Tract 9819.03, Calhoun County, Alabama",13.926276,0,12,...,-999,-999,0,12,-999.0,-999.0,0,0.330164,0.003508,"MULTIPOLYGON (((-85.78501 33.66554, -85.78318 ..."
3,1,ALABAMA,AL,1097,Mobile,1097003605,"Census Tract 36.05, Mobile County, Alabama",0.7323,899,316,...,-999,-999,33,36,3.7,4.1,5135,0.060725,0.00018,"MULTIPOLYGON (((-88.19230 30.69524, -88.19097 ..."
4,1,ALABAMA,AL,1097,Mobile,1097990000,"Census Tract 9900, Mobile County, Alabama",0.0,0,12,...,-999,-999,0,12,-999.0,-999.0,0,0.10538,0.000536,"MULTIPOLYGON (((-88.05337 30.50699, -88.05109 ..."


In [20]:
raw_variables = [
    'EP_POV', 'EP_UNEMP', 'EP_NOHSDP', 'EP_UNINSUR', 'EP_AGE65',
    'EP_AGE17', 'EP_DISABL', 'EP_SNGPNT', 'EP_LIMENG', 'EP_MINRTY', 'EP_MUNIT',
    'EP_MOBILE', 'EP_CROWD', 'EP_NOVEH', 'EP_GROUPQ'
]

In [21]:
svi_df = preprocess_svi_data(us_svi, raw_variables)

In [22]:
svi_df.head()

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP,Shape_Length,Shape_Area,geometry
609,1,ALABAMA,AL,1001,Autauga,1001020100,"Census Tract 201, Autauga County, Alabama",3.790677,1923,253,...,0,0,178,79,0.093,4.1,1018,0.150082,0.000948,"MULTIPOLYGON (((-86.50916 32.47344, -86.50620 ..."
610,1,ALABAMA,AL,1001,Autauga,1001020300,"Census Tract 203, Autauga County, Alabama",2.065365,3476,433,...,0,1,119,81,0.034,2.2,1836,0.100175,0.000516,"MULTIPOLYGON (((-86.47087 32.47573, -86.46964 ..."
611,1,ALABAMA,AL,1001,Autauga,1001020400,"Census Tract 204, Autauga County, Alabama",2.464982,3831,337,...,0,0,108,100,0.028,2.6,1951,0.114106,0.000609,"MULTIPOLYGON (((-86.45394 32.49318, -86.44742 ..."
612,1,ALABAMA,AL,1001,Autauga,1001020500,"Census Tract 205, Autauga County, Alabama",4.404987,9883,726,...,0,0,398,223,0.042,2.4,8342,0.159359,0.001099,"MULTIPOLYGON (((-86.43816 32.45069, -86.43773 ..."
613,1,ALABAMA,AL,1001,Autauga,1001020801,"Census Tract 208.01, Autauga County, Alabama",47.981925,2826,324,...,0,0,144,84,0.051,2.9,1331,0.587644,0.012689,"MULTIPOLYGON (((-86.59535 32.38272, -86.59454 ..."
