Add geographical information to overdose data

In [31]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gdp

In [32]:
overdose_path = "/home/h6x/git_projects/ornl-svi-data-processing/raw_data/HepVu_County_Opioid_Indicators_05DEC22.xlsx"

In [33]:
overdose_df = pd.read_excel(overdose_path)

In [35]:
overdose_df.columns

Index(['GEO ID', 'State Abbreviation', 'County Name',
       'Opioid Prescription Rate 2020',
       'Narcotic Overdose Mortality Rate 2014',
       'Narcotic Overdose Mortality Rate 2015',
       'Narcotic Overdose Mortality Rate 2016',
       'Narcotic Overdose Mortality Rate 2017',
       'Narcotic Overdose Mortality Rate 2018',
       'Narcotic Overdose Mortality Rate 2019',
       'Narcotic Overdose Mortality Rate 2020'],
      dtype='object')

In [34]:
overdose_df.head()

Unnamed: 0,GEO ID,State Abbreviation,County Name,Opioid Prescription Rate 2020,Narcotic Overdose Mortality Rate 2014,Narcotic Overdose Mortality Rate 2015,Narcotic Overdose Mortality Rate 2016,Narcotic Overdose Mortality Rate 2017,Narcotic Overdose Mortality Rate 2018,Narcotic Overdose Mortality Rate 2019,Narcotic Overdose Mortality Rate 2020
0,1001,AL,Autauga County,98.3,8.2,8.8,10.7,9.8,10.9,9.2,11.6
1,1003,AL,Baldwin County,65.0,18.0,20.0,16.6,15.1,14.9,14.5,27.4
2,1005,AL,Barbour County,22.8,4.4,4.5,5.7,5.8,5.2,5.7,7.6
3,1007,AL,Bibb County,24.8,17.2,16.6,22.6,21.7,23.1,19.4,27.3
4,1009,AL,Blount County,22.8,18.6,18.9,22.7,27.0,19.9,20.3,24.2


In [36]:
# renama column 'Opioid Prescription Rate 2020' to pres_r_2020
overdose_df.rename(columns={'Opioid Prescription Rate 2020':'pres_r_2020'}, inplace=True)


overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2014':'NOD_2014'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2015':'NOD_2015'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2016':'NOD_2016'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2017':'NOD_2017'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2018':'NOD_2018'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2019':'NOD_2019'}, inplace=True)
overdose_df.rename(columns={'Narcotic Overdose Mortality Rate 2020':'NOD_2020'}, inplace=True)



In [37]:
def preprocess_overdose_data(overdose_df):
    """Preprocess overdose data."""
    overdose_df['GEO ID'] = overdose_df['GEO ID'].astype(str)
    overdose_df['GEO ID'] = overdose_df['GEO ID'].apply(lambda x: x.zfill(5))
    return overdose_df

In [38]:
overdose_df = preprocess_overdose_data(overdose_df)

In [39]:
svi_df = gdp.read_file("/home/h6x/git_projects/ornl-svi-data-processing/raw_data/svi/2018/SVI2018_US_county.gdb")

In [40]:
overdose_df_ = svi_df.merge(overdose_df, how='left', left_on='FIPS', right_on='GEO ID')

In [41]:
overdose_df_

Unnamed: 0,ST,STATE,ST_ABBR,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,E_HU,...,State Abbreviation,County Name,pres_r_2020,NOD_2014,NOD_2015,NOD_2016,NOD_2017,NOD_2018,NOD_2019,NOD_2020
0,35,NEW MEXICO,NM,Rio Arriba,35039,"Rio Arriba County, New Mexico",5860.869195,39307,0,20044,...,NM,Rio Arriba Coun,33.4,87.9,70.4,79.0,79.3,80.0,70.8,101.3
1,01,ALABAMA,AL,Autauga,01001,"Autauga County, Alabama",594.443459,55200,0,23315,...,AL,Autauga County,98.3,8.2,8.8,10.7,9.8,10.9,9.2,11.6
2,01,ALABAMA,AL,Blount,01009,"Blount County, Alabama",644.830460,57645,0,24222,...,AL,Blount County,22.8,18.6,18.9,22.7,27.0,19.9,20.3,24.2
3,01,ALABAMA,AL,Butler,01013,"Butler County, Alabama",776.838201,20025,0,10026,...,AL,Butler County,35.1,7.4,8.6,10.8,9.8,8.8,10.2,14.3
4,01,ALABAMA,AL,Calhoun,01015,"Calhoun County, Alabama",605.867251,115098,0,53682,...,AL,Calhoun County,97.9,14.8,16.0,14.6,23.4,15.8,17.5,24.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,48,TEXAS,TX,Duval,48131,"Duval County, Texas",1793.476183,11355,0,5592,...,TX,Duval County,3.1,9.3,9.5,12.2,11.6,12.5,12.9,16.3
3138,48,TEXAS,TX,Zapata,48505,"Zapata County, Texas",998.411980,14369,0,6388,...,TX,Zapata County,3.8,6.5,7.0,9.5,9.1,8.2,8.9,11.3
3139,48,TEXAS,TX,Zavala,48507,"Zavala County, Texas",1297.406535,12131,0,4344,...,TX,Zavala County,2.6,6.1,7.0,8.0,8.1,8.2,8.4,10.8
3140,48,TEXAS,TX,Dimmit,48127,"Dimmit County, Texas",1328.884075,10663,0,4408,...,TX,Dimmit County,6.0,8.4,6.7,8.1,8.2,8.9,8.0,11.0


In [42]:
overdose_df_.shape

(3142, 137)

In [45]:
# save overdose_df_ to a geopandas file

overdose_df_.to_file("/home/h6x/git_projects/ornl-adjacency-method/overdose_graphs/processed_data/overdose_with_svi/overdose_df.shp")

  overdose_df_.to_file("/home/h6x/git_projects/ornl-adjacency-method/overdose_graphs/processed_data/overdose_with_svi/overdose_df.shp")
