In [2]:
import numpy as np
import pandas as pd

### Death: Florida and its reference states

In [34]:
all_death = pd.read_csv('../20_intermediate_files/only_od_all_years.csv')
all_death.head()

Unnamed: 0,County,County Code,Year,Year Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,CTYNAME,STNAME
0,"Cochise County, AZ",4003.0,2007.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,13.0,Cochise County,AZ
1,"Gila County, AZ",4007.0,2007.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,10.0,Gila County,AZ
2,"Maricopa County, AZ",4013.0,2007.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,411.0,Maricopa County,AZ
3,"Maricopa County, AZ",4013.0,2007.0,2007.0,Drug poisonings (overdose) Suicide (X60-X64),D2,118.0,Maricopa County,AZ
4,"Maricopa County, AZ",4013.0,2007.0,2007.0,Drug poisonings (overdose) Undetermined (Y10-Y14),D4,45.0,Maricopa County,AZ


In [33]:
fl_pop = pd.read_csv('../20_intermediate_files/fl_death_pop.csv')
fl_pop.STNAME.unique()

array(['Colorado', 'Florida', 'Louisiana', 'Nevada'], dtype=object)

In [35]:
# select only florida and its reference states
fl_ref_states = ['LA', 'NV', 'CO', 'FL']
fl_death = all_death[all_death['STNAME'].isin(fl_ref_states)]

# rename the abbrevation state name to its full name
fl_death_ = fl_death.replace({'NV': 'Nevada', 'FL': 'Florida', 'LA': 'Louisiana', 'CO': 'Colorado'})

# make sure the state names are the same
assert (fl_death_['STNAME'].unique() == fl_pop['STNAME'].unique()).all()

# rename year
fl_death_ = fl_death_.rename(columns={'Year': 'YEAR'})

# remove useless columns: County, Year Code
fl_death_ = fl_death_.drop(columns=['County', 'Year Code'])
fl_death_.head()

Unnamed: 0,County Code,YEAR,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,CTYNAME,STNAME
13,8001.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,51.0,Adams County,Colorado
14,8001.0,2007.0,Drug poisonings (overdose) Suicide (X60-X64),D2,19.0,Adams County,Colorado
15,8005.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,75.0,Arapahoe County,Colorado
16,8005.0,2007.0,Drug poisonings (overdose) Suicide (X60-X64),D2,13.0,Arapahoe County,Colorado
17,8013.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,33.0,Boulder County,Colorado


In [36]:
# merge the two dataframes
fl_death_pop = pd.merge(fl_pop, fl_death_, how='left', on = ['STNAME', 'CTYNAME', 'YEAR'], indicator=True)
fl_death_pop.head()

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
0,Colorado,Adams County,2000,350961.0,,,,,left_only
1,Colorado,Alamosa County,2000,14964.0,,,,,left_only
2,Colorado,Arapahoe County,2000,491701.0,,,,,left_only
3,Colorado,Archuleta County,2000,10038.0,,,,,left_only
4,Colorado,Baca County,2000,4501.0,,,,,left_only


In [37]:
# remove the year where we don't have the opoid death data
min_year = fl_death_['YEAR'].min()
max_year = fl_death_['YEAR'].max()

fl_death_pop = fl_death_pop[(fl_death_pop['YEAR'] >= min_year) & (fl_death_pop['YEAR'] <= max_year)].reset_index(drop=True)
fl_death_pop

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
192,Colorado,Adams County,2003,378785.0,8001.0,Drug poisonings (overdose) Unintentional (X40-...,D1,32.0,both
193,Colorado,Adams County,2003,378785.0,8001.0,Drug poisonings (overdose) Suicide (X60-X64),D2,10.0,both
194,Colorado,Alamosa County,2003,15125.0,,,,,left_only
195,Colorado,Arapahoe County,2003,514406.0,8005.0,Drug poisonings (overdose) Unintentional (X40-...,D1,33.0,both
196,Colorado,Arapahoe County,2003,514406.0,8005.0,Drug poisonings (overdose) Suicide (X60-X64),D2,12.0,both
...,...,...,...,...,...,...,...,...,...
4496,Nevada,Storey County,2015,3881.0,,,,,left_only
4497,Nevada,Washoe County,2015,442617.0,32031.0,Drug poisonings (overdose) Unintentional (X40-...,D1,70.0,both
4498,Nevada,Washoe County,2015,442617.0,32031.0,Drug poisonings (overdose) Suicide (X60-X64),D2,14.0,both
4499,Nevada,White Pine County,2015,9875.0,,,,,left_only


### Death: Texas and its reference states

In [38]:
tx_pop = pd.read_csv('../20_intermediate_files/tx_death_pop.csv')
tx_pop.STNAME.unique()

array(['New York', 'Oregon', 'Texas', 'Wisconsin'], dtype=object)

In [39]:
# select only florida and its reference states
tx_ref_states = ['NY', 'OR', 'TX', 'WI']
tx_death = all_death[all_death['STNAME'].isin(tx_ref_states)]

# rename the abbrevation state name to its full name
tx_death_ = tx_death.replace({'NY': 'New York', 'OR': 'Oregon', 'TX': 'Texas', 'WI': 'Wisconsin'})

# make sure the state names are the same
assert (tx_death_['STNAME'].unique() == tx_pop['STNAME'].unique()).all()

# rename year
tx_death_ = tx_death_.rename(columns={'Year': 'YEAR'})

# remove useless columns: County, Year Code
tx_death_ = tx_death_.drop(columns=['County', 'Year Code'])
tx_death_.head()

Unnamed: 0,County Code,YEAR,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,CTYNAME,STNAME
104,36001.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,13.0,Albany County,New York
105,36001.0,2007.0,Drug poisonings (overdose) Undetermined (Y10-Y14),D4,15.0,Albany County,New York
106,36005.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,157.0,Bronx County,New York
107,36005.0,2007.0,Drug poisonings (overdose) Suicide (X60-X64),D2,13.0,Bronx County,New York
108,36007.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,10.0,Broome County,New York


In [40]:
# merge the two dataframes
tx_death_pop = pd.merge(tx_pop, tx_death_, how='left', on = ['STNAME', 'CTYNAME', 'YEAR'], indicator=True)
tx_death_pop.head()

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
0,New York,Albany County,2000,294960,,,,,left_only
1,New York,Allegany County,2000,49916,,,,,left_only
2,New York,Bronx County,2000,1333854,,,,,left_only
3,New York,Broome County,2000,200319,,,,,left_only
4,New York,Cattaraugus County,2000,84017,,,,,left_only


In [41]:
# remove the year where we don't have the opoid death data
min_year2 = tx_death_['YEAR'].min()
max_year2 = tx_death_['YEAR'].max()

tx_death_pop = tx_death_pop[(tx_death_pop['YEAR'] >= min_year2) & (tx_death_pop['YEAR'] <= max_year2)].reset_index(drop=True)
tx_death_pop

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
186,New York,Albany County,2003,298491,,,,,left_only
187,New York,Allegany County,2003,50220,,,,,left_only
188,New York,Bronx County,2003,1353712,36005.0,Drug poisonings (overdose) Unintentional (X40-...,D1,131.0,both
189,New York,Broome County,2003,198364,,,,,left_only
190,New York,Cattaraugus County,2003,82968,,,,,left_only
...,...,...,...,...,...,...,...,...,...
8482,Wisconsin,Waukesha County,2015,396377,55133.0,Drug poisonings (overdose) Unintentional (X40-...,D1,34.0,both
8483,Wisconsin,Waupaca County,2015,51684,,,,,left_only
8484,Wisconsin,Waushara County,2015,23966,,,,,left_only
8485,Wisconsin,Winnebago County,2015,169383,55139.0,Drug poisonings (overdose) Unintentional (X40-...,D1,22.0,both


### Death: Washington and its reference states

In [42]:
wa_pop = pd.read_csv('../20_intermediate_files/wa_death_pop.csv')
wa_pop.STNAME.unique()

array(['Hawaii', 'Oklahoma', 'Oregon', 'Washington'], dtype=object)

In [43]:
# select only florida and its reference states
wa_ref_states = ['HI', 'OK', 'OR', 'WA']
wa_death = all_death[all_death['STNAME'].isin(wa_ref_states)]

# rename the abbrevation state name to its full name
wa_death_ = wa_death.replace({'HI': 'Hawaii', 'OK': 'Oklahoma', 'OR': 'Oregon', 'WA': 'Washington'})

# make sure the state names are the same
assert (wa_death_['STNAME'].unique() == wa_pop['STNAME'].unique()).all()

# rename year
wa_death_ = wa_death_.rename(columns={'Year': 'YEAR'})

# remove useless columns: County, Year Code
wa_death_ = wa_death_.drop(columns=['County', 'Year Code'])
wa_death_.head()

Unnamed: 0,County Code,YEAR,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,CTYNAME,STNAME
79,15001.0,2007.0,Drug poisonings (overdose) Undetermined (Y10-Y14),D4,14.0,Hawaii County,Hawaii
80,15003.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,75.0,Honolulu County,Hawaii
81,15003.0,2007.0,Drug poisonings (overdose) Undetermined (Y10-Y14),D4,14.0,Honolulu County,Hawaii
139,40017.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,10.0,Canadian County,Oklahoma
140,40027.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,24.0,Cleveland County,Oklahoma


In [44]:
# merge the two dataframes
wa_death_pop = pd.merge(wa_pop, wa_death_, how='left', on = ['STNAME', 'CTYNAME', 'YEAR'], indicator=True)
wa_death_pop.head()

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
0,Hawaii,Hawaii County,2000,149095,,,,,left_only
1,Hawaii,Honolulu County,2000,875061,,,,,left_only
2,Hawaii,Kalawao County,2000,145,,,,,left_only
3,Hawaii,Kauai County,2000,58511,,,,,left_only
4,Hawaii,Maui County,2000,128754,,,,,left_only


In [46]:
# remove the year where we don't have the opoid death data
min_year3 = wa_death_['YEAR'].min()
max_year3 = wa_death_['YEAR'].max()

wa_death_pop = wa_death_pop[(wa_death_pop['YEAR'] >= min_year3) & (wa_death_pop['YEAR'] <= max_year3)].reset_index(drop=True)
wa_death_pop

Unnamed: 0,STNAME,CTYNAME,YEAR,POPULATION,County Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,_merge
0,Hawaii,Hawaii County,2003,156340,15001.0,Drug poisonings (overdose) Unintentional (X40-...,D1,11.0,both
1,Hawaii,Honolulu County,2003,888026,15003.0,Drug poisonings (overdose) Unintentional (X40-...,D1,47.0,both
2,Hawaii,Honolulu County,2003,888026,15003.0,Drug poisonings (overdose) Suicide (X60-X64),D2,15.0,both
3,Hawaii,Kalawao County,2003,129,,,,,left_only
4,Hawaii,Kauai County,2003,60061,,,,,left_only
...,...,...,...,...,...,...,...,...,...
2200,Washington,Wahkiakum County,2015,3989,,,,,left_only
2201,Washington,Walla Walla County,2015,59970,,,,,left_only
2202,Washington,Whatcom County,2015,211942,53073.0,Drug poisonings (overdose) Unintentional (X40-...,D1,23.0,both
2203,Washington,Whitman County,2015,48224,,,,,left_only
