## Mortality Data

In [1]:
import pandas as pd
import numpy as np


### Read Data

- Unzip using the unzip filename command in terminal

- Update relative path to file on url below


In [3]:
col_names = ["Year", "358 Cause recoded", "State", "County"]

col_width = [(101, 105), (149, 152), (20, 22), (22, 25)]

# put link to the unzipped file
data_07 = pd.read_fwf(
    "../00_source_data/VS07MORT.DUSMCPUB", colspecs=col_width, names=col_names
)


### Using data from cdc wonder

In [4]:
coc_mapping = pd.read_csv("../00_source_data/COC mapping.csv")


## Drug Deaths

In [5]:
alldeathdata0720 = pd.read_csv("../00_source_data/alldrugdeaths0720.txt", sep="\t")
# alldeathdata0720["Year"].value_counts()


In [32]:
alldata0720merge = pd.merge(
    coc_mapping,
    alldeathdata0720,
    left_on=["STNAME", "FIPS code"],
    right_on=["State", "County Code"],
    how="left",
    indicator=True,
)


In [33]:
alldata0720merge["_merge"].value_counts()


both          424
left_only       0
right_only      0
Name: _merge, dtype: int64

In [6]:
alldeathdata21 = pd.read_csv("../00_source_data/alldrugdeaths21.txt", sep="\t")


In [35]:
alldata21merge = pd.merge(
    coc_mapping,
    alldeathdata21,
    left_on=["STNAME", "FIPS code"],
    right_on=["State", "County Code"],
    how="left",
    indicator=True,
)


In [36]:
alldata21merge["_merge"].value_counts()


both          31
left_only      0
right_only     0
Name: _merge, dtype: int64

In [37]:
alldata = pd.concat([alldata0720merge, alldata21merge])


In [38]:
alldata0721 = alldata[
    [
        "STNAME",
        "CoC Code",
        "Coc",
        "CTYNAME",
        "FIPS code",
        "Year",
        "Deaths",
    ]
]


### Check for Missing drug deaths data

In [42]:
years = [i for i in range(2007, 2022)]
for county in coc_mapping["CTYNAME"].unique():
    tmp_data = alldata0721[alldata0721["CTYNAME"] == county]
    tmp_year = tmp_data["Year"].unique()
    if len(tmp_year) != 15:
        print(f"Data for {county}")
        print(f"Missing years : {list(set(years) - set(tmp_year))}")


Data for Broomfield County
Missing years : [2017, 2007, 2008, 2009, 2010, 2011, 2012, 2014, 2015]
Data for Douglas County
Missing years : [2008]


In [40]:
# missing Douglas county
dougcounty = alldata0721[(alldata0721["CTYNAME"] == "Douglas County")]
dougcounty["Year"].value_counts()


2007.0    1
2009.0    1
2010.0    1
2011.0    1
2012.0    1
2013.0    1
2014.0    1
2015.0    1
2016.0    1
2017.0    1
2018.0    1
2019.0    1
2020.0    1
2021.0    1
Name: Year, dtype: int64

In [43]:
dougcountyoriginal = alldeathdata0720[(alldeathdata0720["County Code"] == 8035)]
len(dougcountyoriginal["Year"])  # without 2021


13

### Impute Missing Data

In [44]:
# add 2008 for douglas county as average between 2007 and 2009
doug07deaths = alldata0721[
    (alldata0721["FIPS code"] == 8035) & (alldata0721["Year"] == 2007)
]["Deaths"].values
doug09deaths = alldata0721[
    (alldata0721["FIPS code"] == 8035) & (alldata0721["Year"] == 2009)
]["Deaths"].values
doug08deaths = (doug07deaths + doug09deaths) / 2


In [45]:
doug_08_data = {
    "STNAME": "Colorado",
    "CoC Code": "CO-503",
    "Coc": "Metropolitan Denver",
    "CTYNAME": "Douglas County",
    "FIPS code": 8035,
    "Year": 2008,
    "Deaths": doug08deaths,
}
df_doug_08 = pd.DataFrame(doug_08_data)


In [46]:
alldrugdatafinal = pd.concat([alldata0721, df_doug_08], ignore_index=True)
alldrugdatafinal.tail()


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
451,Texas,TX-700,"Houston, Pasadena, Conroe/Harris, Ft. Bend, Mo...",Montgomery County,48339,2021.0,129.0
452,Texas,TX-700,"Houston, Pasadena, Conroe/Harris, Ft. Bend, Mo...",Fort Bend County,48157,2021.0,78.0
453,Texas,TX-503,Austin/Travis County,Travis County,48453,2021.0,269.0
454,Washington,WA-500,Seattle/King,King County,53033,2021.0,667.0
455,Colorado,CO-503,Metropolitan Denver,Douglas County,8035,2008.0,20.0


In [47]:
# fixing broomfield
years_available = alldrugdatafinal[alldrugdatafinal["FIPS code"] == 8014]["Year"]
missing_brooomfield_years = list(set(years) - set(years_available))

tmp_dfs = []

for year in missing_brooomfield_years:
    tmp_data = {
        "STNAME": "Colorado",
        "CoC Code": "CO-503",
        "Coc": "Metropolitan Denver",
        "CTYNAME": "Broomfield County",
        "FIPS code": 8014,
        "Year": year,
        "Deaths": np.NaN,
    }
    df = pd.DataFrame(tmp_data, index=[0])
    tmp_dfs.append(df)

broomfield_df = pd.concat(tmp_dfs, ignore_index=True)
broomfield_df


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
0,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2017,
1,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2007,
2,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2008,
3,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2009,
4,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2010,
5,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2011,
6,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2012,
7,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2014,
8,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2015,


In [49]:
alldrugdatafinal0721 = pd.concat([alldrugdatafinal, broomfield_df], ignore_index=True)
alldrugdatafinal0721["Year"] = alldrugdatafinal0721["Year"].astype(int)
alldrugdatafinal0721.tail(10)


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
455,Colorado,CO-503,Metropolitan Denver,Douglas County,8035,2008,20.0
456,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2017,
457,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2007,
458,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2008,
459,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2009,
460,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2010,
461,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2011,
462,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2012,
463,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2014,
464,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2015,


### Combine data

In [50]:
alldrugdatafinal0721 = alldrugdatafinal0721.rename(
    columns={"Deaths": "Drug Deaths"}
).sort_values(by=["CTYNAME", "Year"])
alldrugdatafinal0721


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Drug Deaths
84,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2007,75.0
85,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2008,64.0
86,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2009,71.0
87,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2010,66.0
88,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2011,93.0
...,...,...,...,...,...,...,...
252,Michigan,MI-501,Detroit,Wayne County,26163,2017,756.0
253,Michigan,MI-501,Detroit,Wayne County,26163,2018,775.0
254,Michigan,MI-501,Detroit,Wayne County,26163,2019,756.0
255,Michigan,MI-501,Detroit,Wayne County,26163,2020,804.0


In [None]:
# check length
assert len(alldrugdatafinal0721) == len(coc_mapping) * len(years)

# write data
alldrugdatafinal0721.to_csv("../20_intermediate_files/drug_deaths.csv", index=True)


## Suicide data

In [7]:
allmh0720 = pd.read_csv("../00_source_data/allmhdata0720.txt", sep="\t")
# alldeathdata0720["Year"].value_counts()


In [53]:
allmhdata0720merge = pd.merge(
    coc_mapping,
    allmh0720,
    left_on=["STNAME", "FIPS code"],
    right_on=["State", "County Code"],
    how="left",
    indicator=True,
)


In [54]:
allmhdata0720merge["_merge"].value_counts()


both          429
left_only       0
right_only      0
Name: _merge, dtype: int64

In [55]:
allmh21 = pd.read_csv("../00_source_data/allmhdata21.txt", sep="\t")


In [56]:
allmh21merge = pd.merge(
    coc_mapping,
    allmh21,
    left_on=["STNAME", "FIPS code"],
    right_on=["State", "County Code"],
    how="left",
    indicator=True,
)


In [57]:
allmh21merge["_merge"].value_counts()


both          31
left_only      0
right_only     0
Name: _merge, dtype: int64

In [58]:
allmhdata = pd.concat([allmhdata0720merge, allmh21merge])


In [59]:
allmh0721 = allmhdata[
    [
        "STNAME",
        "CoC Code",
        "Coc",
        "CTYNAME",
        "FIPS code",
        "Year",
        "Deaths",
    ]
]


In [60]:
allmh0721.head()


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
0,Arizona,AZ-502,"Phoenix,Mesa/Maricopa",Maricopa County,4013,2007.0,568.0
1,Arizona,AZ-502,"Phoenix,Mesa/Maricopa",Maricopa County,4013,2008.0,540.0
2,Arizona,AZ-502,"Phoenix,Mesa/Maricopa",Maricopa County,4013,2009.0,524.0
3,Arizona,AZ-502,"Phoenix,Mesa/Maricopa",Maricopa County,4013,2010.0,564.0
4,Arizona,AZ-502,"Phoenix,Mesa/Maricopa",Maricopa County,4013,2011.0,621.0


### Check missing suicide values

In [62]:
years = [i for i in range(2007, 2022)]
for county in coc_mapping["CTYNAME"].unique():
    tmp_data = allmh0721[allmh0721["CTYNAME"] == county]
    tmp_year = tmp_data["Year"].unique()
    if len(tmp_year) != 15:
        print(f"Data for {county}")
        print(f"Missing years : {list(set(years) - set(tmp_year))}")


Data for Broomfield County
Missing years : [2017, 2020, 2008, 2009, 2013]


### Impute missing Broomfield data

In [63]:
# fixing broomfield suicide values
years_available = allmh0721[allmh0721["FIPS code"] == 8014]["Year"]
missing_brooomfield_years = list(set(years) - set(years_available))

tmp_dfs = []

for year in missing_brooomfield_years:
    tmp_data = {
        "STNAME": "Colorado",
        "CoC Code": "CO-503",
        "Coc": "Metropolitan Denver",
        "CTYNAME": "Broomfield County",
        "FIPS code": 8014,
        "Year": year,
        "Deaths": np.NaN,
    }
    df = pd.DataFrame(tmp_data, index=[0])
    tmp_dfs.append(df)

broomfield_mh_df = pd.concat(tmp_dfs, ignore_index=True)
broomfield_mh_df


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
0,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2017,
1,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2020,
2,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2008,
3,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2009,
4,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2013,


In [64]:
allmhfinal0721 = pd.concat([allmh0721, broomfield_mh_df], ignore_index=True)
allmhfinal0721["Year"] = allmhfinal0721["Year"].astype(int)
allmhfinal0721.tail(10)


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Deaths
455,Texas,TX-700,"Houston, Pasadena, Conroe/Harris, Ft. Bend, Mo...",Harris County,48201,2021,566.0
456,Texas,TX-700,"Houston, Pasadena, Conroe/Harris, Ft. Bend, Mo...",Montgomery County,48339,2021,113.0
457,Texas,TX-700,"Houston, Pasadena, Conroe/Harris, Ft. Bend, Mo...",Fort Bend County,48157,2021,87.0
458,Texas,TX-503,Austin/Travis County,Travis County,48453,2021,198.0
459,Washington,WA-500,Seattle/King,King County,53033,2021,295.0
460,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2017,
461,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2020,
462,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2008,
463,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2009,
464,Colorado,CO-503,Metropolitan Denver,Broomfield County,8014,2013,


### Combining data

In [65]:
allmhfinal0721 = allmhfinal0721.rename(
    columns={"Deaths": "Suicide Deaths"}
).sort_values(by=["CTYNAME", "Year"])
allmhfinal0721


Unnamed: 0,STNAME,CoC Code,Coc,CTYNAME,FIPS code,Year,Suicide Deaths
84,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2007,72.0
85,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2008,69.0
86,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2009,71.0
87,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2010,51.0
88,Colorado,CO-503,Metropolitan Denver,Adams County,8001,2011,66.0
...,...,...,...,...,...,...,...
257,Michigan,MI-501,Detroit,Wayne County,26163,2017,203.0
258,Michigan,MI-501,Detroit,Wayne County,26163,2018,225.0
259,Michigan,MI-501,Detroit,Wayne County,26163,2019,211.0
260,Michigan,MI-501,Detroit,Wayne County,26163,2020,218.0


In [157]:
# check length
assert len(allmhfinal0721) == len(coc_mapping) * len(years)

# write data
allmhfinal0721.to_csv("../20_intermediate_files/suicide_deaths.csv", index=True)
