In [27]:
import pandas as pd
import numpy as np
import dask.dataframe as ddf
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px

### This Suicide Mortality dataset is collected from CDC wonder: https://wonder.cdc.gov/controller/datarequest/D76, the ICD-10 code: intentional self-harm(suicide)(U03, X60-X84, Y87). 
### Suicide data from 2005 to 2015 is provided

In [28]:
years = [ str(year) for year in range(2005, 2016) ]

In [29]:
data = []
for year in years:
    df = pd.read_csv(r'suicide/Suicide_' + year + '.txt', sep='\t',dtype={"County Code": str})
    df = df.drop('Notes', axis = 1)
    df = df.dropna(how = 'any')
    df['Deaths']=df['Deaths'].astype(int)
    df['Population']=df['Population'].astype(int)
    df['SuicideMortalityRate'] = (df['Deaths'] / df['Population'])*100000
    df['year'] = year
    data.append(df)

In [30]:
suicide = pd.concat(data)
suicide

Unnamed: 0,County,County Code,Deaths,Population,Crude Rate,SuicideMortalityRate,year
0,"Baldwin County, AL",01003,14,162183,Unreliable,8.632224,2005
1,"Calhoun County, AL",01015,14,114477,Unreliable,12.229531,2005
2,"Cullman County, AL",01043,15,78692,Unreliable,19.061658,2005
3,"Etowah County, AL",01055,16,103174,Unreliable,15.507783,2005
4,"Houston County, AL",01069,15,93903,Unreliable,15.973931,2005
...,...,...,...,...,...,...,...
949,"Laramie County, WY",56021,27,97121,27.8,27.800373,2015
950,"Natrona County, WY",56025,28,82178,34.1,34.072379,2015
951,"Park County, WY",56029,10,29228,Unreliable,34.213768,2015
952,"Sheridan County, WY",56033,12,30009,Unreliable,39.988004,2015


In [31]:
suicide = suicide.rename(columns = {'County': 'county', 'County Code': 'fips'})
suicide

Unnamed: 0,county,fips,Deaths,Population,Crude Rate,SuicideMortalityRate,year
0,"Baldwin County, AL",01003,14,162183,Unreliable,8.632224,2005
1,"Calhoun County, AL",01015,14,114477,Unreliable,12.229531,2005
2,"Cullman County, AL",01043,15,78692,Unreliable,19.061658,2005
3,"Etowah County, AL",01055,16,103174,Unreliable,15.507783,2005
4,"Houston County, AL",01069,15,93903,Unreliable,15.973931,2005
...,...,...,...,...,...,...,...
949,"Laramie County, WY",56021,27,97121,27.8,27.800373,2015
950,"Natrona County, WY",56025,28,82178,34.1,34.072379,2015
951,"Park County, WY",56029,10,29228,Unreliable,34.213768,2015
952,"Sheridan County, WY",56033,12,30009,Unreliable,39.988004,2015


In [32]:
amount_list = []
for year in years:
    amount_dict = {}
    suicide_year = suicide[suicide['year'] == year]
    county_amount = len(suicide_year['fips'].unique())
    amount_dict[year] = county_amount 
    amount_list.append(amount_dict)

In [33]:
amount_list

[{'2005': 735},
 {'2006': 764},
 {'2007': 779},
 {'2008': 798},
 {'2009': 791},
 {'2010': 851},
 {'2011': 882},
 {'2012': 878},
 {'2013': 888},
 {'2014': 902},
 {'2015': 954}]

In [34]:
def defineRange(r):
    if r <= 10:
        return '<=10'
    elif r >= 10.1 and r <=12:
        return '10.1-12'
    elif r >= 12.1 and r <=14:
        return '12.1-14'
    elif r >= 14.1 and r <=16:
        return '14.1-16'
    elif r >= 16.1 and r <=18:
        return '16.1-18'
    elif r >= 18.1 and r <=20:
        return '18.1-20'
    elif r >= 20.1 and r <=22:
        return '20.1-22'
    elif r >= 22.1 and r <=24:
        return '22.1-24'
    elif r >= 24.1 and r <=28:
        return '24.1-28'
    elif r >= 28.1 and r <=36:
        return '28.1-36'
    else:
        return '>36'

In [35]:
suicide['RateRange'] = suicide['SuicideMortalityRate'].apply(defineRange)
suicide

Unnamed: 0,county,fips,Deaths,Population,Crude Rate,SuicideMortalityRate,year,RateRange
0,"Baldwin County, AL",01003,14,162183,Unreliable,8.632224,2005,<=10
1,"Calhoun County, AL",01015,14,114477,Unreliable,12.229531,2005,12.1-14
2,"Cullman County, AL",01043,15,78692,Unreliable,19.061658,2005,18.1-20
3,"Etowah County, AL",01055,16,103174,Unreliable,15.507783,2005,14.1-16
4,"Houston County, AL",01069,15,93903,Unreliable,15.973931,2005,14.1-16
...,...,...,...,...,...,...,...,...
949,"Laramie County, WY",56021,27,97121,27.8,27.800373,2015,24.1-28
950,"Natrona County, WY",56025,28,82178,34.1,34.072379,2015,28.1-36
951,"Park County, WY",56029,10,29228,Unreliable,34.213768,2015,28.1-36
952,"Sheridan County, WY",56033,12,30009,Unreliable,39.988004,2015,>36


In [36]:
color_map = {'<=10': '#5f50a2', '10.1-12': '#3388bd', '12.1-14': '#66c2a6', '14.1-16': '#abdda4', '16.1-18': '#e6f598', '18.1-20': '#ffffbf',
            '20.1-22': '#fde08b', '22.1-24': '#fcad61', '24.1-28': '#f36d44', '28.1-36': '#b63e4f', '>36': '#9e0143'}

In [37]:
# you can change the year value to show suicide rate in any other years between 2005 to 2015 on the map
year = '2014'

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
import pandas as pd
import plotly.express as px

print('Suicide Rate Before Imputation in ' + str(year))
suicide_year = suicide[suicide['year'] == year]
suicide_year = suicide_year.sort_values("SuicideMortalityRate", axis = 0, ascending = False)
fig = px.choropleth_mapbox(suicide_year, geojson=counties, locations='fips', color='RateRange',
                            color_discrete_map = color_map,
                            mapbox_style = 'carto-positron',
                            zoom = 3, center = {"lat": 37.0902, "lon": -95.7129}
                            )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

Suicide Rate Before Imputation in 2014


In [None]:
county = pd.read_csv('county_adjacency.csv', dtype = {'fips': str}).drop(columns={'Unnamed: 0'})
county

In [None]:
suicide

### Merge each yearly suicide data with county dataframe

In [None]:
data = []
for year in years:
    suicide_year = suicide[suicide['year'] == year]
    suicide_county = county.merge(suicide_year, on = ['county', 'fips'], how = 'left')
    suicide_county['year'] = year
    data.append(suicide_county)

In [None]:
df_suicide = pd.concat(data)
df_suicide

####  the suicide data used into INLA model should be sorted by fips and year first to make the model recognize the spatial and temporal effects

In [None]:
df_suicide = df_suicide.sort_values(['fips', 'year'])
df_suicide.head(50)

In [None]:
df_final = df_suicide[['county', 'fips', 'year', 'Deaths', 'Population', 'SuicideMortalityRate']]
df_final

In [None]:
df_final.to_csv('suicide_0515.csv')

### Read the suicide data after imputed

In [None]:
suicide_imputed = pd.read_csv('suicide_0515_imputed.csv', dtype = {'fips': str})
suicide_imputed

In [None]:
def convertFips(code):
    return str(code).rjust(5, '0')

In [None]:
suicide_imputed['fips'] = suicide_imputed['fips'].apply(convertFips)

In [None]:
suicide_imputed['RateRange'] = suicide_imputed['SuicideEstimates'].apply(defineRange)
suicide_imputed.head()

In [None]:
year = 2014

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
import pandas as pd
import plotly.express as px

print('Suicide Rate After Imputation in ' + str(year))
suicide_year = suicide_imputed[suicide_imputed['year'] == year]
suicide_year = suicide_year.sort_values("SuicideEstimates", axis = 0, ascending = False)
fig = px.choropleth_mapbox(suicide_year, geojson=counties, locations='fips', color='RateRange',
                            color_discrete_map = color_map,
                            mapbox_style = 'carto-positron',
                            zoom = 3, center = {"lat": 37.0902, "lon": -95.7129}
                            )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()