In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.tools as pytl
from config import api_key
from config import username
pytl.set_credentials_file(username=username, api_key=api_key)

In [2]:
file = "Resources/statistics.csv"
psych_df = pd.read_csv(file, encoding="utf-8")
psych_df.head()
psych_df["measure_name"].unique()

array(['Deaths', 'YLDs (Years Lived with Disability)',
       'YLLs (Years of Life Lost)', 'Prevalence', 'Incidence',
       'DALYs (Disability-Adjusted Life Years)'], dtype=object)

In [3]:
YLL_group = psych_df.groupby("measure_name")
YLL_only = psych_df.loc[psych_df["measure_name"] == "YLLs (Years of Life Lost)"]
YLL_only.head()
YLL_only_cleaned = YLL_only[["location_name", "sex_name", "age_name", "cause_name", "metric_name", "val"]]
YLL_only_cleaned.head(30)
YLL_number = YLL_only_cleaned.loc[YLL_only_cleaned["metric_name"] == "Number"]
YLL_number.head()
YLL_number_cleaned = YLL_number[["location_name", "sex_name", "age_name", "cause_name", "val"]]
YLL_number_statesort = YLL_number_cleaned.sort_values("location_name", ascending=True)
YLL_number_statesort.head()
YLL_number_statesort.to_csv("output_CSVs/cleaned_YLL.csv", encoding="utf-8", index=False)

In [4]:
YLL_number_statesort

Unnamed: 0,location_name,sex_name,age_name,cause_name,val
28562,Alabama,Female,60 to 64,Alcohol use disorders,134.331887
54437,Alabama,Male,35 to 39,Substance use disorders,6156.921305
54438,Alabama,Female,35 to 39,Substance use disorders,3620.912936
54443,Alabama,Male,40 to 44,Substance use disorders,4913.513333
54444,Alabama,Female,40 to 44,Substance use disorders,3349.800930
54449,Alabama,Male,45 to 49,Substance use disorders,4518.062818
54450,Alabama,Female,45 to 49,Substance use disorders,3645.351956
54455,Alabama,Male,50 to 54,Substance use disorders,3854.580352
54456,Alabama,Female,50 to 54,Substance use disorders,2986.107592
54461,Alabama,Male,55 to 59,Substance use disorders,2773.346804


In [5]:
alabama = YLL_number_statesort.loc[YLL_number_statesort["location_name"] == "Alabama"]
alabama
sums = alabama["val"].sum()
sums

202536.89450275275

In [6]:
alaska = YLL_number_statesort.loc[YLL_number_statesort["location_name"] == "Alaska"]
sums = alaska["val"].sum()
sums

35573.32085013478

In [7]:
total_YLL = []
for state in YLL_number_statesort["location_name"].unique():
    locator = YLL_number_statesort.loc[YLL_number_statesort["location_name"] == state]
    sums = round(locator["val"].sum(),1)
    total_YLL.append(sums)
total_YLL

[202536.9,
 35573.3,
 249389.4,
 81269.8,
 878335.4,
 159679.8,
 110526.3,
 36157.8,
 18866.0,
 692897.1,
 299419.7,
 23042.4,
 42746.8,
 334113.8,
 238126.5,
 56814.2,
 71569.3,
 272945.2,
 187594.7,
 51059.2,
 90056.9,
 244567.4,
 380566.7,
 96826.9,
 116049.8,
 223861.0,
 36846.5,
 27610.6,
 124349.4,
 56706.0,
 237012.0,
 108137.1,
 444441.9,
 324179.2,
 12701.1,
 558669.1,
 185454.3,
 129431.8,
 546866.2,
 39862.2,
 178171.2,
 15327.6,
 324836.8,
 607737.6,
 92582.4,
 16377.5,
 195684.5,
 224512.9,
 148374.1,
 156097.5,
 22443.3]

In [8]:
unique = YLL_number_statesort["location_name"].unique()
YLL_df = pd.DataFrame({"State": unique,
                             "YLLs (Years of Life Lost)": total_YLL}, index=None)

In [9]:
YLL_df
YLL_df.to_csv("cleaned_total_YLL.csv", encoding="utf-8", index=False)
sorted_YLL = YLL_df.sort_values(by=["State"], ascending=True)
sorted_YLL.head()

Unnamed: 0,State,YLLs (Years of Life Lost)
0,Alabama,202536.9
1,Alaska,35573.3
2,Arizona,249389.4
3,Arkansas,81269.8
4,California,878335.4


In [10]:

state_abbr = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
            "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
state_df = pd.DataFrame({
    "Abbreviation": state_abbr
})

merged_YLL = pd.DataFrame({
    "State": sorted_YLL["State"],
    "Abbreviation": state_df["Abbreviation"],
    "YLL": sorted_YLL["YLLs (Years of Life Lost)"]
})
merged_YLL.to_csv("output_CSVs/cleaned_YLL_sorted.csv", encoding='utf-8')
merged_YLL

Unnamed: 0,State,Abbreviation,YLL
0,Alabama,AL,202536.9
1,Alaska,AK,35573.3
2,Arizona,AZ,249389.4
3,Arkansas,AR,81269.8
4,California,CA,878335.4
5,Colorado,CO,159679.8
6,Connecticut,CT,110526.3
7,Delaware,DC,36157.8
8,District of Columbia,DE,18866.0
9,Florida,FL,692897.1


In [11]:
for col in merged_YLL:
    merged_YLL[col] = merged_YLL[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

merged_YLL['text'] = merged_YLL["Abbreviation"] + '<br>' +\
    '"YLLs (Years of Life Lost)" '+merged_YLL["YLL"]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = merged_YLL["Abbreviation"],
        z = merged_YLL["YLL"].astype(float),
        locationmode = 'USA-states',
        text = merged_YLL['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "YLLs (Years of Life Lost)"
        )
    ) ]

layout = dict(
        title = '2017 US Mental Illness: YLLs (Years of Life Lost)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig6 = dict( data=data, layout=layout )

url6 = py.plot( fig6, filename='YLL' )
url6

'https://plot.ly/~bkcenik/12'

In [12]:
print(f"YLL data is located at https://plot.ly/~bkcenik/18")

YLL data is located at https://plot.ly/~bkcenik/18
