In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.tools as pytl
from config import api_key
from config import username
pytl.set_credentials_file(username=username, api_key=api_key)

In [6]:
file = "Resources/statistics.csv"
psych_df = pd.read_csv(file, encoding="utf-8")
psych_df.head()
psych_df["measure_name"].unique()

array(['Deaths', 'YLDs (Years Lived with Disability)',
       'YLLs (Years of Life Lost)', 'Prevalence', 'Incidence',
       'DALYs (Disability-Adjusted Life Years)'], dtype=object)

In [9]:
DALY_group = psych_df.groupby("measure_name")
DALY_only = psych_df.loc[psych_df["measure_name"] == 'DALYs (Disability-Adjusted Life Years)']
DALY_only.head()
DALY_only_cleaned = DALY_only[["location_name", "sex_name", "age_name", "cause_name", "metric_name", "val"]]
DALY_only_cleaned.head(30)
DALY_number = DALY_only_cleaned.loc[DALY_only_cleaned["metric_name"] == "Number"]
DALY_number.head()
DALY_number_cleaned = DALY_number[["location_name", "sex_name", "age_name", "cause_name", "val"]]
DALY_number_statesort = DALY_number_cleaned.sort_values("location_name", ascending=True)
DALY_number_statesort.head()
DALY_number_statesort.to_csv("Output_CSVs/cleanedDALY.csv", encoding="utf-8", index=False)

In [11]:
DALY_number_statesort

Unnamed: 0,location_name,sex_name,age_name,cause_name,val
486931,Alabama,Female,80 to 84,Bipolar disorder,33.088843
486577,Alabama,Female,15 to 19,Depressive disorders,2442.404162
486582,Alabama,Male,20 to 24,Depressive disorders,1469.307068
486583,Alabama,Female,20 to 24,Depressive disorders,2293.299951
486588,Alabama,Male,25 to 29,Depressive disorders,1386.893886
486589,Alabama,Female,25 to 29,Depressive disorders,2244.713551
486594,Alabama,Male,30 to 34,Depressive disorders,1233.955104
486595,Alabama,Female,30 to 34,Depressive disorders,2078.193893
486600,Alabama,Male,35 to 39,Depressive disorders,1296.744145
486601,Alabama,Female,35 to 39,Depressive disorders,2296.517283


In [11]:
alabama = DALY_number_statesort.loc[DALY_number_statesort["location_name"] == "Alabama"]
alabama
sums = alabama["val"].sum()
sums

589755.822409285

In [12]:
alaska = DALY_number_statesort.loc[DALY_number_statesort["location_name"] == "Alaska"]
sums = alaska["val"].sum()
sums

98664.70567534836

In [13]:
total_DALY = []
for state in DALY_number_statesort["location_name"].unique():
    locator = DALY_number_statesort.loc[DALY_number_statesort["location_name"] == state]
    sums = round(locator["val"].sum(),1)
    total_DALY.append(sums)
total_DALY

[589755.8,
 98664.7,
 852195.8,
 325822.3,
 3741299.1,
 587855.6,
 404694.7,
 117115.2,
 73975.4,
 2271085.0,
 1080402.4,
 125495.9,
 179473.9,
 1276507.1,
 775218.0,
 266206.3,
 284325.2,
 685360.0,
 554873.8,
 161132.0,
 532118.0,
 804230.4,
 1152640.3,
 476501.3,
 347522.3,
 707063.7,
 119500.7,
 151920.6,
 393060.2,
 174661.9,
 896411.7,
 294983.1,
 1936880.0,
 1108267.3,
 58663.3,
 1536390.3,
 520166.1,
 460078.8,
 1620977.1,
 136739.9,
 584462.0,
 69024.6,
 907924.7,
 2544857.0,
 356579.6,
 65389.4,
 839722.2,
 799690.8,
 332650.9,
 614357.0,
 68878.6]

In [14]:
unique = DALY_number_statesort["location_name"].unique()
DALY_df = pd.DataFrame({"State": unique,
                             'DALYs (Disability-Adjusted Life Years)': total_DALY}, index=None)

In [15]:
DALY_df
DALY_df.to_csv("cleaned_total_DALY.csv", encoding="utf-8", index=False)
sorted_DALY = DALY_df.sort_values(by=["State"], ascending=True)
sorted_DALY.head()

Unnamed: 0,State,DALYs (Disability-Adjusted Life Years)
0,Alabama,589755.8
1,Alaska,98664.7
2,Arizona,852195.8
3,Arkansas,325822.3
4,California,3741299.1


In [16]:

state_abbr = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
            "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
state_df = pd.DataFrame({
    "Abbreviation": state_abbr
})

merged_DALY = pd.DataFrame({
    "State": sorted_DALY["State"],
    "Abbreviation": state_df["Abbreviation"],
    "DALY": sorted_DALY['DALYs (Disability-Adjusted Life Years)']
})
merged_DALY.to_csv("output_CSVs/cleaned_DALY_2.csv", encoding='utf-8')
merged_DALY

Unnamed: 0,State,Abbreviation,DALY
0,Alabama,AL,589755.8
1,Alaska,AK,98664.7
2,Arizona,AZ,852195.8
3,Arkansas,AR,325822.3
4,California,CA,3741299.1
5,Colorado,CO,587855.6
6,Connecticut,CT,404694.7
7,Delaware,DC,117115.2
8,District of Columbia,DE,73975.4
9,Florida,FL,2271085.0


In [17]:
for col in merged_DALY:
    merged_DALY[col] = merged_DALY[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

merged_DALY['text'] = merged_DALY["Abbreviation"] + '<br>' +\
    '"DALYs (Disability-Adjusted Life Years)" '+merged_DALY["DALY"]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = merged_DALY["Abbreviation"],
        z = merged_DALY["DALY"].astype(float),
        locationmode = 'USA-states',
        text = merged_DALY['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "DALYs (Disability-Adjusted Life Years)"
        )
    ) ]

layout = dict(
        title = '2017 US Mental Illness: DALYs (Disability-Adjusted Life Years)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig7 = dict( data=data, layout=layout )

url7 = py.plot( fig7, filename='DALY' )
print(url7)


https://plot.ly/~bkcenik/14


In [18]:
print(f"image is located at https://plot.ly/~bkcenik/14")

image is located at https://plot.ly/~bkcenik/14
