In [137]:
# import dependencies
%matplotlib notebook
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt



In [138]:
# import source data files

deaths_2013_2014 = pd.read_csv("raw_data/OD_2013_2014.csv")
deaths_2015 = pd.read_csv("raw_data/OD_2015.csv")
deaths_2016 = pd.read_csv("raw_data/OD_2016.csv")
deaths_2017 = pd.read_csv("raw_data/OD_2017.csv")

regions = pd.read_csv("raw_data/regions.csv")

# reduce dataframes to include only state and death statistic
#############THESE ARE THE OVERDOSE BY STATE DATA FRAMES *********************
deaths_2013_df = deaths_2013_2014[['State', '2013Number']]
deaths_2013_df = deaths_2013_df.rename(index=str, columns={"2013Number": "2013Deaths"})
deaths_2014_df = deaths_2013_2014[['State', '2014Number']]
deaths_2014_df = deaths_2014_df.rename(index=str, columns={"2014Number": "2014Deaths"})
deaths_2015_df = deaths_2015[['State', 'Number']]
deaths_2015_df = deaths_2015_df.rename(index=str, columns={"Number": "2015Deaths"})
deaths_2016_df = deaths_2016[['State', 'number']]
deaths_2016_df = deaths_2016_df.rename(index=str, columns={"number": "2016Deaths"})
deaths_2017_df = deaths_2017[['State', 'number']]
deaths_2017_df = deaths_2017_df.rename(index=str, columns={"number": "2017Deaths"})

deaths_2015_df.head()



Unnamed: 0,State,2015Deaths
0,AL,736
1,AK,122
2,AZ,1274
3,AR,392
4,CA,4659


In [139]:
# import perscription data
rx_2013 = pd.read_csv("raw_data/RX_2013.csv")
rx_2014 = pd.read_csv("raw_data/RX_2014.csv")
rx_2015 = pd.read_csv("raw_data/RX_2015.csv")
rx_2016 = pd.read_csv("raw_data/RX_2016.csv")
rx_2017 = pd.read_csv("raw_data/RX_2017.csv")

# reduce dataframes to include only state and death statistic 
rx_2013_df = rx_2013[['State ABBR', '2013 Prescribing Rate']]
rx_2013_df = rx_2013_df.rename(index=str, columns={"State ABBR": "State", "2013 Prescribing Rate": "2013_RX_rate"})
rx_2014_df = rx_2014[['State ABBR', '2014 Prescribing Rate']]
rx_2014_df = rx_2014_df.rename(index=str, columns={"State ABBR": "State", "2014 Prescribing Rate": "2014_RX_rate"})
rx_2015_df = rx_2015[['State ABBR', '2015 Prescribing Rate']]
rx_2015_df = rx_2015_df.rename(index=str, columns={"State ABBR": "State", "2015 Prescribing Rate": "2015_RX_rate"})
rx_2016_df = rx_2016[['State ABBR', '2016 Prescribing Rate']]
rx_2016_df = rx_2016_df.rename(index=str, columns={"State ABBR": "State", "2016 Prescribing Rate": "2016_RX_rate"})
rx_2017_df = rx_2017[['Abbreviation', 'Year 2017']].drop([51], axis=0).reset_index(drop=True)
rx_2017_df = rx_2017_df.rename(index=str, columns={"Abbreviation": "State", "Year 2017": "2017_RX_rate"})

# drop last row in rx_2017 becuase it is a duplicate entry (WY)
# rx_2017_df = rx_2017_df.drop([51], axis=0).reset_index()

#######THESE ARE THE PERSCRIPTION BY STATE DATA FRAMES*******************
# rx_2013_df
# rx_2014_df
# rx_2015_df
# rx_2016_df
# rx_2017_df


rx_2017_df.head()

Unnamed: 0,State,2017_RX_rate
0,AL,107.2
1,AK,52.0
2,AZ,61.2
3,AR,105.4
4,CA,39.5


In [146]:
# import population data
population2010_2017 = pd.read_csv("raw_data/Pop_2013-2017.csv")
# remove first two rows because they are insignificant to the dataframe
population2010_2017_a = population2010_2017.drop([0,1,2,3,4,56,57,58,59,60,61,62], axis=0).reset_index(drop=True)
# rename column headers
population2010_2017_a = population2010_2017_a.rename(index=str, columns={"Unnamed: 0": "State", "2013": "2013_Pop", "2014": "2014_Pop", "2015": "2015_Pop", "2016": "2016_Pop", "2017": "2017_Pop"})
# create dataframe with new column names
pop_2010_2017_df = pd.DataFrame(population2010_2017_a)
pop_2010_2017_df = pop_2010_2017_df.replace({'.Alabama': 'AL',
                                             '.Alaska': 'AK', 
                                             '.Arizona': 'AZ',
                                             '.Arkansas': 'AR',
                                             '.California': 'CA',
                                             '.Colorado': 'CO',
                                             '.Connecticut': 'CT',
                                             '.Delaware': 'DE',
                                             '.District of Columbia': 'DC',
                                             '.Florida': 'FL',
                                             '.Georgia': 'GA',
                                             '.Hawaii': 'HI',
                                             '.Idaho': 'ID',
                                             '.Illinois': 'IL',
                                             '.Indiana': 'IN',
                                             '.Iowa': 'IA',
                                             '.Kansas': 'KS',
                                             '.Kentucky': 'KY',
                                             '.Louisiana':'LA',
                                             '.Maine': 'ME',
                                             '.Maryland':'MD',
                                             '.Massachusetts':'MA',
                                             '.Michigan':'MI',
                                             '.Minnesota':'MN',
                                             '.Mississippi':'MS',
                                             '.Missouri':'MO',
                                             '.Montana':'MT',
                                             '.Nebraska':'NE',
                                             '.Nevada':'NV',
                                             '.New Hampshire':'NH',
                                             '.New Jersey':'NJ',
                                             '.New Mexico':'NM',
                                             '.New York':'NY',
                                             '.North Carolina':'NC',
                                             '.North Dakota':'ND',
                                             '.Ohio':'OH',
                                             '.Oklahoma':'OK',
                                             '.Oregon':'OR',
                                             '.Pennsylvania':'PA',
                                             '.Rhode Island':'RI',
                                             '.South Carolina':'SC',
                                             '.South Dakota':'SD',
                                             '.Tennessee':'TN',
                                             '.Texas':'TX',
                                             '.Utah':'UT',
                                             '.Vermont':'VT',
                                             '.Virginia':'VA',
                                             '.Washington':'WA',
                                             '.West Virginia':'WV',
                                             '.Wisconsin':'WI',
                                             '.Wyoming':'WY'})
######THIS IS THE POPULATION DATA FRAME******************************
# pop_2010_2017_df

# merge regions
pop_2010_2017_df = pop_2010_2017_df.merge(regions)

pop_2010_2017_df.head()

Unnamed: 0,State,2013_Pop,2014_Pop,2015_Pop,2016_Pop,2017_Pop,Region
0,AL,4827660,4840037,4850858,4860545,4874747,Southeast
1,AK,736760,736759,737979,741522,739795,Far West
2,AZ,6616124,6706435,6802262,6908642,7016270,Southwest
3,AR,2956780,2964800,2975626,2988231,3004279,Southeast
4,CA,38347383,38701278,39032444,39296476,39536653,Far West


In [143]:
# merge population_df, rx_dfs, and deaths_dfs (prd = population rx deaths)
# prd_df = pop_2010_2017_df.merge(rx_2013_df).merge(deaths_2013_df).merge(rx_2014_df).merge(deaths_2014_df).merge(rx_2015_df).merge(deaths_2015_df).merge(rx_2016_df).merge(deaths_2016_df).merge(rx_2017_df).merge(deaths_2017_df)

# merge 2013 tables
prd_13_df = pop_2010_2017_df.merge(rx_2013_df).merge(deaths_2013_df)
# merge 2013-2014 tables
prd_14_df = prd_13_df.merge(rx_2014_df).merge(deaths_2014_df)
# merge 2013-2015 tables
prd_15_df = prd_14_df.merge(rx_2015_df).merge(deaths_2015_df)
# merge 2013-2016 tables
prd_16_df = prd_15_df.merge(rx_2016_df).merge(deaths_2016_df)
# merge 2013-2017 tables
prd_17_df = prd_16_df.merge(rx_2017_df).merge(deaths_2017_df)

prd_17_df.head()


Unnamed: 0,State,2013_Pop,2014_Pop,2015_Pop,2016_Pop,2017_Pop,Region,2013_RX_rate,2013Deaths,2014_RX_rate,2014Deaths,2015_RX_rate,2015Deaths,2016_RX_rate,2016Deaths,2017_RX_rate,2017Deaths
0,AL,4827660,4840037,4850858,4860545,4874747,Southeast,142.4,598,135.2,723,125.0,736,121.0,756,107.2,835
1,AK,736760,736759,737979,741522,739795,Far West,63.7,105,62.7,124,60.8,122,58.9,128,52.0,147
2,AZ,6616124,6706435,6802262,6908642,7016270,Southwest,80.4,1222,79.7,1211,75.5,1274,70.2,1382,61.2,1532
3,AR,2956780,2964800,2975626,2988231,3004279,Southeast,120.9,319,123.2,356,117.2,392,114.6,401,105.4,446
4,CA,38347383,38701278,39032444,39296476,39536653,Far West,54.4,4452,52.7,4521,47.7,4659,44.8,4654,39.5,4868


In [145]:
# change column locations and create the final consolidated dataframe called "master_df"
master_df = pd.DataFrame(prd_17_df[['State','Region',
                                    '2013_Pop','2013_RX_rate','2013Deaths',
                                    '2014_Pop','2014_RX_rate','2014Deaths',
                                    '2015_Pop','2015_RX_rate','2015Deaths',
                                    '2016_Pop','2016_RX_rate','2016Deaths',
                                    '2017_Pop','2017_RX_rate','2017Deaths']])

master_df.head()

Unnamed: 0,State,Region,2013_Pop,2013_RX_rate,2013Deaths,2014_Pop,2014_RX_rate,2014Deaths,2015_Pop,2015_RX_rate,2015Deaths,2016_Pop,2016_RX_rate,2016Deaths,2017_Pop,2017_RX_rate,2017Deaths
0,AL,Southeast,4827660,142.4,598,4840037,135.2,723,4850858,125.0,736,4860545,121.0,756,4874747,107.2,835
1,AK,Far West,736760,63.7,105,736759,62.7,124,737979,60.8,122,741522,58.9,128,739795,52.0,147
2,AZ,Southwest,6616124,80.4,1222,6706435,79.7,1211,6802262,75.5,1274,6908642,70.2,1382,7016270,61.2,1532
3,AR,Southeast,2956780,120.9,319,2964800,123.2,356,2975626,117.2,392,2988231,114.6,401,3004279,105.4,446
4,CA,Far West,38347383,54.4,4452,38701278,52.7,4521,39032444,47.7,4659,39296476,44.8,4654,39536653,39.5,4868


In [147]:
master_df.head(10)


Unnamed: 0,State,Region,2013_Pop,2013_RX_rate,2013Deaths,2014_Pop,2014_RX_rate,2014Deaths,2015_Pop,2015_RX_rate,2015Deaths,2016_Pop,2016_RX_rate,2016Deaths,2017_Pop,2017_RX_rate,2017Deaths
0,AL,Southeast,4827660,142.4,598,4840037,135.2,723,4850858,125.0,736,4860545,121.0,756,4874747,107.2,835
1,AK,Far West,736760,63.7,105,736759,62.7,124,737979,60.8,122,741522,58.9,128,739795,52.0,147
2,AZ,Southwest,6616124,80.4,1222,6706435,79.7,1211,6802262,75.5,1274,6908642,70.2,1382,7016270,61.2,1532
3,AR,Southeast,2956780,120.9,319,2964800,123.2,356,2975626,117.2,392,2988231,114.6,401,3004279,105.4,446
4,CA,Far West,38347383,54.4,4452,38701278,52.7,4521,39032444,47.7,4659,39296476,44.8,4654,39536653,39.5,4868
5,CO,Rocky Mountain,5262556,71.2,846,5342311,69.6,899,5440445,65.1,869,5530105,59.8,942,5607154,52.9,1015
6,CT,New England,3602470,67.4,582,3600188,66.0,623,3593862,62.3,800,3587685,55.9,971,3588184,48.0,1072
7,DE,Mideast,925114,92.7,166,934805,91.0,189,944107,84.4,198,952698,79.2,282,961939,68.3,338
8,DC,Mideast,650114,41.1,102,660797,40.1,96,672736,35.7,125,684336,32.5,269,693972,28.5,310
9,FL,Southeast,19584927,73.5,2474,19897747,71.4,2634,20268567,67.1,3228,20656589,66.6,4728,20984400,60.9,5088
