In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
import numpy as np

In [162]:
# Import drug death csv into a dataframe
drug_data = pd.read_csv("../Resources/Accidental_Drug_Related_Deaths_2012-2018.csv")
drug_data

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,...,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,14-0273,06/28/2014 12:00:00 AM,DateReported,,,,,,,,...,,,,,,,Accident,"CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"
1,13-0102,03/21/2013 12:00:00 AM,DateofDeath,48.0,Male,Black,NORWALK,,,NORWALK,...,,,,,,,Accident,"Norwalk, CT\n(41.11805, -73.412906)","NORWALK, CT\n(41.11805, -73.412906)","CT\n(41.575155, -72.738288)"
2,16-0165,03/13/2016 12:00:00 AM,DateofDeath,30.0,Female,White,SANDY HOOK,FAIRFIELD,CT,DANBURY,...,,,,,,Y,Accident,"Danbury, CT\n(41.393666, -73.451539)","SANDY HOOK, CT\n(41.419998, -73.282501)",
3,16-0208,03/31/2016 12:00:00 AM,DateofDeath,23.0,Male,White,RYE,WESTCHESTER,NY,GREENWICH,...,,,,,,Y,Accident,"Greenwich, CT\n(41.026526, -73.628549)",,
4,13-0052,02/13/2013 12:00:00 AM,DateofDeath,22.0,Male,"Asian, Other",FLUSHING,QUEENS,,GREENWICH,...,,,,,,,Accident,"Greenwich, CT\n(41.026526, -73.628549)",,"CT\n(41.575155, -72.738288)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5100,15-0466,09/08/2015 12:00:00 AM,DateReported,43.0,Male,White,CHESHIRE,NEW HAVEN,CT,CHESHIRE,...,,,,,,,Accident,"CHESHIRE, CT\n(41.498834, -72.901448)","CHESHIRE, CT\n(41.498834, -72.901448)","CT\n(41.575155, -72.738288)"
5101,17-0618,07/22/2017 12:00:00 AM,DateReported,21.0,Male,White,MADISON,NEW HAVEN,CT,NEW HAVEN,...,,,,,,,Accident,"New Haven, CT\n(41.308252, -72.924161)","MADISON, CT\n(41.271447, -72.60086)","CT\n(41.575155, -72.738288)"
5102,18-0646,08/14/2018 12:00:00 AM,DateofDeath,30.0,Male,White,LAWRENCEVILLE,TIOGA,PA,DANBURY,...,Y,,,,,Y,Accident,"DANBURY, CT\n(41.393666, -73.451539)",,"DANBURY, CT\n(41.393666, -73.451539)"
5103,14-0124,03/16/2014 12:00:00 AM,DateofDeath,33.0,Male,White,HARTFORD,,,WINDSOR,...,,,,,,,Accident,"WINDSOR, CT\n(41.852781, -72.64379)","HARTFORD, CT\n(41.765775, -72.673356)","CT\n(41.575155, -72.738288)"


In [163]:
drug_data.count()

ID                     5105
Date                   5103
DateType               5103
Age                    5102
Sex                    5099
Race                   5092
ResidenceCity          4932
ResidenceCounty        4308
ResidenceState         3556
DeathCity              5100
DeathCounty            4005
Location               5081
LocationifOther         590
DescriptionofInjury    4325
InjuryPlace            5039
InjuryCity             3349
InjuryCounty           2364
InjuryState            1424
COD                    5105
OtherSignifican         169
Heroin                 2529
Cocaine                1521
Fentanyl               2232
FentanylAnalogue        389
Oxycodone               607
Oxymorphone             108
Ethanol                1247
Hydrocodone             118
Benzodiazepine         1343
Methadone               474
Amphet                  159
Tramad                  130
Morphine_NotHeroin       42
Hydromorphone            25
Other                   435
OpiateNOS           

In [164]:
# Remove records missing data from Age, Sex, Race, Residence City, and Death City
drug_data = drug_data.dropna(subset=["Age","Sex","Race","ResidenceCity","DeathCity"])

ID                     4920
Date                   4920
DateType               4920
Age                    4920
Sex                    4920
Race                   4920
ResidenceCity          4920
ResidenceCounty        4296
ResidenceState         3547
DeathCity              4920
DeathCounty            3864
Location               4903
LocationifOther         503
DescriptionofInjury    4170
InjuryPlace            4859
InjuryCity             3223
InjuryCounty           2271
InjuryState            1366
COD                    4920
OtherSignifican         163
Heroin                 2437
Cocaine                1433
Fentanyl               2137
FentanylAnalogue        368
Oxycodone               597
Oxymorphone             105
Ethanol                1189
Hydrocodone             115
Benzodiazepine         1305
Methadone               456
Amphet                  156
Tramad                  127
Morphine_NotHeroin       41
Hydromorphone            23
Other                   420
OpiateNOS           

In [165]:
# Remove unplanned columns: Coordinate Columns
drug_data = drug_data.drop(columns=["OpiateNOS","AnyOpioid","DeathCityGeo","ResidenceCityGeo","InjuryCityGeo"])
# Verify Changes
#drug_data.count()

In [166]:
# Rename columns as needed
drug_data = drug_data.rename(columns={"FentanylAnalogue":"Fentanyl Analogue",
                                      "Amphet":"Amphetamine",
                                      "Tramad":"Tramadol",
                                      "Morphine_NotHeroin":"Morphine Not Heroin"})
# Verify Changes
drug_data.columns

Index(['ID', 'Date', 'DateType', 'Age', 'Sex', 'Race', 'ResidenceCity',
       'ResidenceCounty', 'ResidenceState', 'DeathCity', 'DeathCounty',
       'Location', 'LocationifOther', 'DescriptionofInjury', 'InjuryPlace',
       'InjuryCity', 'InjuryCounty', 'InjuryState', 'COD', 'OtherSignifican',
       'Heroin', 'Cocaine', 'Fentanyl', 'Fentanyl Analogue', 'Oxycodone',
       'Oxymorphone', 'Ethanol', 'Hydrocodone', 'Benzodiazepine', 'Methadone',
       'Amphetamine', 'Tramadol', 'Morphine Not Heroin', 'Hydromorphone',
       'Other', 'MannerofDeath'],
      dtype='object')

In [167]:
# Combine 'Fentanyl Analogue' into 'Fentanyl'
# Correct 'Other to 'Y'
for index,row in drug_data.iterrows():
    # Add Fentanyl Analogue deaths to Fentanyl
    if row['Fentanyl Analogue'] == 'Y' and row['Fentanyl'] != 'Y':
        drug_data.loc[index,'Fentanyl'] = 'Y'
    # If Other contains information, update that column to 'Y'
    if pd.isna(row['Other']) == False:
        drug_data.loc[index,'Other'] = 'Y'
        
# Now that Fentanyl Analogue has been added into Fentanyl, drop Fentanyl Analogue column
drug_data = drug_data.drop(columns=["Fentanyl Analogue"])
drug_data.columns

Index(['ID', 'Date', 'DateType', 'Age', 'Sex', 'Race', 'ResidenceCity',
       'ResidenceCounty', 'ResidenceState', 'DeathCity', 'DeathCounty',
       'Location', 'LocationifOther', 'DescriptionofInjury', 'InjuryPlace',
       'InjuryCity', 'InjuryCounty', 'InjuryState', 'COD', 'OtherSignifican',
       'Heroin', 'Cocaine', 'Fentanyl', 'Oxycodone', 'Oxymorphone', 'Ethanol',
       'Hydrocodone', 'Benzodiazepine', 'Methadone', 'Amphetamine', 'Tramadol',
       'Morphine Not Heroin', 'Hydromorphone', 'Other', 'MannerofDeath'],
      dtype='object')

In [168]:
# Add 'Y' to 'Other' for any columns w/o 'Y's
# Check initial Other count
print(drug_data['Other'].value_counts())

for index,row in drug_data.iterrows():
    # 14 columns of drugs
    if row[20:34].isnull().sum() == 14:
        #print(row[20:34])
        drug_data.loc[index,'Other'] = 'Y'
            
drug_data['Other'].value_counts()

Y    420
Name: Other, dtype: int64


Y    457
Name: Other, dtype: int64

In [169]:
# Export to .csv
drug_data.to_csv("../Resources/drug_death_data_clean.csv")