In [1]:
# Dependencies
import os 
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress 

#Reading in Files
cdc_df = pd.read_csv("Resource/VSRR_Provisional_Drug_Overdose_Death_Counts_2015_2021.csv")
care_df = pd.read_csv("Resource/Medicare_Opioid_Prescribing_Data_2013_2019.csv", low_memory = False)
caid_df = pd.read_csv("Resource/Medicaid_Opioid_Prescribing_data_2013_2019.csv", low_memory = False)

In [8]:
# Filtering Medcaid dropping year 2013 & 2014 and filtering for all plan types
caid_year = caid_df.loc[(caid_df["Year"] != 2013) & (caid_df["Year"] != 2014)]

caid_clean = caid_year.loc[caid_year["Plan_Type"]=="All"]

In [10]:
# Changing column name to state for merging later
caid_clean = caid_clean.rename(columns={"Geo_Desc":"State"})

caid_clean

Unnamed: 0,Year,Geo_Lvl,Geo_Cd,State,Plan_Type,Tot_Opioid_Clms,Tot_Clms,Opioid_Prscrbng_Rate,Opioid_Prscrbng_Rate_5Y_Chg,Opioid_Prscrbng_Rate_1Y_Chg,LA_Tot_Opioid_Clms,LA_Opioid_Prscrbng_Rate,LA_Opioid_Prscrbng_Rate_5Y_Chg,LA_Opioid_Prscrbng_Rate_1Y_Chg
0,2019,National,,National,All,21978286.0,677248025.0,3.25,-3.22,-0.44,3108845.0,14.15,6.67,4.62
3,2019,State,1.0,Alabama,All,224310.0,7230251.0,3.10,-3.23,-1.08,10308.0,4.60,0.16,0.48
6,2019,State,2.0,Alaska,All,65394.0,1434139.0,4.56,-3.33,-1.20,8268.0,12.64,0.88,-0.71
9,2019,State,4.0,Arizona,All,595206.0,14923326.0,3.99,-5.14,-0.75,44311.0,7.44,-2.79,-0.52
12,2019,State,5.0,Arkansas,All,216081.0,4999626.0,4.32,-3.12,-0.88,8129.0,3.76,-0.34,-1.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2015,State,51.0,Virginia,All,583116.0,10186332.0,5.72,,-0.62,42311.0,7.26,,-0.26
768,2015,State,53.0,Washington,All,1136474.0,14016747.0,8.11,,-0.89,106143.0,9.34,,0.18
771,2015,State,54.0,West Virginia,All,520370.0,9293729.0,5.60,,-1.30,25575.0,4.91,,0.66
774,2015,State,55.0,Wisconsin,All,962286.0,11306189.0,8.51,,-0.35,146248.0,15.20,,1.04


In [7]:
# Filtering Medicaid Data by Breakout Type and Perscription Geographic Levels for National or State levels
care_year = care_df.loc[(care_df["Year"] != 2013) & (care_df["Year"] != 2014)]

care_clean = care_year.loc[(care_year["Breakout_Type"]=="Totals") & (care_year["Prscrbr_Geo_Lvl"]=="National") | (care_year["Prscrbr_Geo_Lvl"]=="State")]

In [8]:
# Changing column name to state for merging late
care_clean = care_clean.rename(columns={"Prscrbr_Geo_Desc":"State"})

care_clean

Unnamed: 0,Year,Prscrbr_Geo_Lvl,Prscrbr_Geo_Cd,State,RUCA_Cd,Breakout_Type,Breakout,Tot_Prscrbrs,Tot_Opioid_Prscrbrs,Tot_Opioid_Clms,Tot_Clms,Opioid_Prscrbng_Rate,Opioid_Prscrbng_Rate_5Y_Chg,Opioid_Prscrbng_Rate_1Y_Chg,LA_Tot_Opioid_Clms,LA_Opioid_Prscrbng_Rate,LA_Opioid_Prscrbng_Rate_5Y_Chg,LA_Opioid_Prscrbng_Rate_1Y_Chg
0,2019,National,,National,,Totals,Overall,1239380.0,803549.0,66138200.0,1.501140e+09,4.41,-1.32,-0.27,7290510.0,11.02,-1.73,-0.77
3,2019,State,1.0,Alabama,,Totals,Overall,15575.0,9739.0,1814583.0,2.800821e+07,6.48,-1.40,-0.42,138456.0,7.63,-2.13,-0.73
4,2019,State,2.0,Alaska,,Totals,Overall,2847.0,1935.0,80311.0,1.639104e+06,4.90,-2.01,-0.90,13005.0,16.19,-3.73,-2.42
5,2019,State,4.0,Arizona,,Totals,Overall,25876.0,16578.0,1359955.0,2.569376e+07,5.29,-1.79,-0.33,184250.0,13.55,-2.58,-1.25
6,2019,State,5.0,Arkansas,,Totals,Overall,9700.0,7258.0,998319.0,1.789430e+07,5.58,-1.08,-0.32,76135.0,7.63,-1.69,-0.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120121,2015,State,60.0,American Samoa,,Rural/Urban,Urban,0.0,0.0,,0.000000e+00,,,,,,,
120122,2015,State,66.0,Guam,,Rural/Urban,Urban,0.0,0.0,0.0,0.000000e+00,,,,0.0,,,
120123,2015,State,69.0,Northern Mariana Islands,,Rural/Urban,Urban,0.0,0.0,0.0,0.000000e+00,,,,,,,
120124,2015,State,72.0,Puerto Rico,,Rural/Urban,Urban,10416.0,6946.0,438797.0,2.743746e+07,1.60,,-0.67,13865.0,3.16,,0.62


In [9]:
cdc_df

Unnamed: 0,State,Year,Month,Period,Indicator,Data Value,Percent Complete,Percent Pending Investigation,State Name,Footnote,Footnote Symbol,Predicted Value
0,AK,2015,April,12 month-ending,"Natural & semi-synthetic opioids, incl. methad...",,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
1,AK,2015,April,12 month-ending,Cocaine (T40.5),,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
2,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
3,AK,2015,April,12 month-ending,Number of Drug Overdose Deaths,126,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,126
4,AK,2015,April,12 month-ending,Psychostimulants with abuse potential (T43.6),,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
...,...,...,...,...,...,...,...,...,...,...,...,...
44689,YC,2021,May,12 month-ending,"Synthetic opioids, excl. methadone (T40.4)",1792,100,0.298805,New York City,Underreported due to incomplete data.,*,1864
44690,YC,2021,May,12 month-ending,Percent with drugs specified,99.2307692307692,100,0.298805,New York City,Underreported due to incomplete data.,*,
44691,YC,2021,May,12 month-ending,Number of Deaths,62248,100,0.298805,New York City,Underreported due to incomplete data.,*,
44692,YC,2021,May,12 month-ending,Cocaine (T40.5),962,100,0.298805,New York City,Underreported due to incomplete data.,*,1005


In [10]:
# Dropping years 2020 & 2021 and filtering indicator column 
cdc_year = cdc_df.loc[(cdc_df["Year"] != 2020) & (cdc_df["Year"] != 2021)]

cdc_indicator = cdc_year.loc[(cdc_year["Indicator"]=="Number of Drug Overdose Deaths") | (cdc_year["Indicator"]=="Number of Deaths") | (cdc_year["Indicator"]=="Natural & semi-synthetic opioids (T40.2)") | (cdc_year["Indicator"]=="Synthetic opioids, excl. methadone (T40.4)")]

In [11]:
# Changing column names for merging later
cdc_indicator = cdc_indicator.rename(columns={"State" : "State ID", "State Name" : "State"})

cdc_indicator

Unnamed: 0,State ID,Year,Month,Period,Indicator,Data Value,Percent Complete,Percent Pending Investigation,State,Footnote,Footnote Symbol,Predicted Value
2,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
3,AK,2015,April,12 month-ending,Number of Drug Overdose Deaths,126,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,126
6,AK,2015,April,12 month-ending,Number of Deaths,4133,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
9,AK,2015,April,12 month-ending,"Synthetic opioids, excl. methadone (T40.4)",,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
15,AK,2015,August,12 month-ending,"Synthetic opioids, excl. methadone (T40.4)",,100,0.000000,Alaska,Numbers may differ from published reports usin...,**,
...,...,...,...,...,...,...,...,...,...,...,...,...
44465,YC,2019,October,12 month-ending,Number of Deaths,54386,100,0.058839,New York City,Numbers may differ from published reports usin...,**,
44466,YC,2019,September,12 month-ending,Natural & semi-synthetic opioids (T40.2),251,100,0.055099,New York City,Numbers may differ from published reports usin...,**,253
44472,YC,2019,September,12 month-ending,"Synthetic opioids, excl. methadone (T40.4)",893,100,0.055099,New York City,Numbers may differ from published reports usin...,**,898
44473,YC,2019,September,12 month-ending,Number of Drug Overdose Deaths,1457,100,0.055099,New York City,Numbers may differ from published reports usin...,**,1460


In [12]:
# Merging Medicaid & Medicare data with an Outer Join
insurance = pd.merge(care_clean, caid_clean, how = "outer")
insurance

Unnamed: 0,Year,Prscrbr_Geo_Lvl,Prscrbr_Geo_Cd,State,RUCA_Cd,Breakout_Type,Breakout,Tot_Prscrbrs,Tot_Opioid_Prscrbrs,Tot_Opioid_Clms,...,Opioid_Prscrbng_Rate,Opioid_Prscrbng_Rate_5Y_Chg,Opioid_Prscrbng_Rate_1Y_Chg,LA_Tot_Opioid_Clms,LA_Opioid_Prscrbng_Rate,LA_Opioid_Prscrbng_Rate_5Y_Chg,LA_Opioid_Prscrbng_Rate_1Y_Chg,Geo_Lvl,Geo_Cd,Plan_Type
0,2019,National,,National,,Totals,Overall,1239380.0,803549.0,66138200.0,...,4.41,-1.32,-0.27,7290510.0,11.02,-1.73,-0.77,,,
1,2019,State,1.0,Alabama,,Totals,Overall,15575.0,9739.0,1814583.0,...,6.48,-1.40,-0.42,138456.0,7.63,-2.13,-0.73,,,
2,2019,State,2.0,Alaska,,Totals,Overall,2847.0,1935.0,80311.0,...,4.90,-2.01,-0.90,13005.0,16.19,-3.73,-2.42,,,
3,2019,State,4.0,Arizona,,Totals,Overall,25876.0,16578.0,1359955.0,...,5.29,-1.79,-0.33,184250.0,13.55,-2.58,-1.25,,,
4,2019,State,5.0,Arkansas,,Totals,Overall,9700.0,7258.0,998319.0,...,5.58,-1.08,-0.32,76135.0,7.63,-1.69,-0.93,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1100,2015,,,Virginia,,,,,,583116.0,...,5.72,,-0.62,42311.0,7.26,,-0.26,State,51.0,All
1101,2015,,,Washington,,,,,,1136474.0,...,8.11,,-0.89,106143.0,9.34,,0.18,State,53.0,All
1102,2015,,,West Virginia,,,,,,520370.0,...,5.60,,-1.30,25575.0,4.91,,0.66,State,54.0,All
1103,2015,,,Wisconsin,,,,,,962286.0,...,8.51,,-0.35,146248.0,15.20,,1.04,State,55.0,All


In [15]:
# Merging CDC data & Insurace Data w/ an outer join and on Year and State
data = pd.merge(cdc_indicator, insurance, how = "outer", on = ["Year", "State"])

data

Unnamed: 0,State ID,Year,Month,Period,Indicator,Data Value,Percent Complete,Percent Pending Investigation,State,Footnote,...,Opioid_Prscrbng_Rate,Opioid_Prscrbng_Rate_5Y_Chg,Opioid_Prscrbng_Rate_1Y_Chg,LA_Tot_Opioid_Clms,LA_Opioid_Prscrbng_Rate,LA_Opioid_Prscrbng_Rate_5Y_Chg,LA_Opioid_Prscrbng_Rate_1Y_Chg,Geo_Lvl,Geo_Cd,Plan_Type
0,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.0,Alaska,Numbers may differ from published reports usin...,...,6.81,,-0.10,18217.0,19.96,,0.04,,,
1,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.0,Alaska,Numbers may differ from published reports usin...,...,7.99,,0.24,8974.5,23.11,,1.72,,,
2,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.0,Alaska,Numbers may differ from published reports usin...,...,6.13,,-0.27,10617.0,20.26,,-1.04,,,
3,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100,0.0,Alaska,Numbers may differ from published reports usin...,...,7.61,,-0.28,9048.0,12.32,,0.56,State,2.0,All
4,AK,2015,April,12 month-ending,Number of Drug Overdose Deaths,126,100,0.0,Alaska,Numbers may differ from published reports usin...,...,6.81,,-0.10,18217.0,19.96,,0.04,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46160,,2015,,,,,,,Puerto Rico,,...,2.10,,-0.60,415.0,3.20,,1.29,,,
46161,,2015,,,,,,,Puerto Rico,,...,1.60,,-0.67,13865.0,3.16,,0.62,,,
46162,,2015,,,,,,,Virgin Islands,,...,1.67,,-0.15,204.0,5.75,,1.22,,,
46163,,2015,,,,,,,Virgin Islands,,...,,,,0.0,,,,,,


In [5]:
cdc_indicator.groupby(['State ID', 'Year']).agg({'Data Value' : ['sum']}).reset_index()

NameError: name 'cdc_indicator' is not defined