In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.tools as pytl
from config import api_key
from config import username
pytl.set_credentials_file(username=username, api_key=api_key)

In [2]:
file = "Resources/statistics.csv"
psych_df = pd.read_csv(file, encoding="utf-8")
psych_df.head()

Unnamed: 0,measure_id,measure_name,location_id,location_name,sex_id,sex_name,age_id,age_name,cause_id,cause_name,metric_id,metric_name,year,val,upper,lower
0,1,Deaths,569,Virginia,1,Male,8,15 to 19,973,Substance use disorders,1,Number,2017,17.009529,24.473869,11.200329
1,1,Deaths,569,Virginia,2,Female,8,15 to 19,973,Substance use disorders,1,Number,2017,6.289063,8.849906,4.321646
2,1,Deaths,569,Virginia,1,Male,8,15 to 19,973,Substance use disorders,2,Percent,2017,0.092553,0.127542,0.064141
3,1,Deaths,569,Virginia,2,Female,8,15 to 19,973,Substance use disorders,2,Percent,2017,0.086652,0.116264,0.062147
4,1,Deaths,569,Virginia,1,Male,8,15 to 19,973,Substance use disorders,3,Rate,2017,6.044813,8.697475,3.980351


In [3]:
incidence_group = psych_df.groupby("measure_name")
incidence_only = psych_df.loc[psych_df["measure_name"] == "Incidence"]
incidence_only.head()
incidence_only_cleaned = incidence_only[["location_name", "sex_name", "age_name", "cause_name", "metric_name", "val"]]
incidence_only_cleaned.head(30)
incidence_rate = incidence_only_cleaned.loc[incidence_only_cleaned["metric_name"] == "Rate"]
incidence_rate_cleaned = incidence_rate[["location_name", "sex_name", "age_name", "cause_name", "val"]]
incidence_rate_statesort = incidence_rate_cleaned.sort_values("location_name", ascending=True)
incidence_rate_statesort.to_csv("output_CSVs/_cleaned_incidence_rate.csv", encoding="utf-8", index=False)

In [4]:
incidence_rate_statesort.head()

Unnamed: 0,location_name,sex_name,age_name,cause_name,val
58806,Alabama,Male,30 to 34,Eating disorders,314.156047
58765,Alabama,Female,70 to 74,Anxiety disorders,330.402128
58770,Alabama,Male,75 to 79,Anxiety disorders,332.234053
58771,Alabama,Female,75 to 79,Anxiety disorders,274.168163
58776,Alabama,Male,80 plus,Anxiety disorders,221.386348


In [5]:
alabama = incidence_rate_statesort.loc[incidence_rate_statesort["location_name"] == "Alabama"]
alabama
sums = alabama["val"].sum()
sums

574877.4556487392

In [6]:
alaska = incidence_rate_statesort.loc[incidence_rate_statesort["location_name"] == "Alaska"]
sums = alaska["val"].sum()
sums

627095.7176349997

In [7]:
total_incidence = []
for state in incidence_rate_statesort["location_name"].unique():
    locator = incidence_rate_statesort.loc[incidence_rate_statesort["location_name"] == state]
    sums = round(locator["val"].sum(),1)
    total_incidence.append(sums)
total_incidence

[574877.5,
 627095.7,
 644322.7,
 648514.7,
 536310.0,
 633348.7,
 522888.9,
 584715.5,
 521089.4,
 571913.6,
 559305.8,
 575189.6,
 644902.2,
 512863.9,
 620159.9,
 560572.3,
 586257.1,
 620857.7,
 558927.7,
 604229.9,
 567943.1,
 548824.1,
 585000.1,
 560933.1,
 576263.6,
 609357.3,
 659323.8,
 544687.9,
 641005.7,
 607199.1,
 488409.2,
 670918.6,
 575831.6,
 554688.6,
 531634.0,
 586964.4,
 646922.2,
 621533.1,
 561725.6,
 620755.8,
 597041.6,
 522359.5,
 622951.4,
 536605.7,
 701362.3,
 601512.4,
 570410.9,
 613215.7,
 659853.5,
 587418.5,
 652507.0]

In [8]:
unique = incidence_rate_statesort["location_name"].unique()
incidence_df = pd.DataFrame({"State": unique,
                             "Incidence of Mental Illness (Rate)": total_incidence}, index=None)
incidence_df

Unnamed: 0,State,Incidence of Mental Illness (Rate)
0,Alabama,574877.5
1,Alaska,627095.7
2,Arizona,644322.7
3,Arkansas,648514.7
4,California,536310.0
5,Colorado,633348.7
6,Connecticut,522888.9
7,Delaware,584715.5
8,District of Columbia,521089.4
9,Florida,571913.6


In [9]:
incidence_df.to_csv("cleaned_total_incidence_rate.csv", encoding="utf-8", index=False)
sorted_incidence = incidence_df.sort_values(by=["State"], ascending=True)
sorted_incidence.head()

Unnamed: 0,State,Incidence of Mental Illness (Rate)
0,Alabama,574877.5
1,Alaska,627095.7
2,Arizona,644322.7
3,Arkansas,648514.7
4,California,536310.0


In [11]:
state_abbr = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
            "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
state_df = pd.DataFrame({
    "Abbreviation": state_abbr
})
incidence_rate_final = pd.DataFrame({
    "State": sorted_incidence["State"],
    "Incidence (Rate)": sorted_incidence["Incidence of Mental Illness (Rate)"],
    "Abbreviation": state_df["Abbreviation"]
})
incidence_rate_final
incidence_rate_sorted = incidence_rate_final.sort_values(by="Incidence (Rate)", ascending=True)
incidence_rate_sorted.to_csv("output_CSVs/cleaned_incidence_rate_sorted.csv", encoding="utf-8")

In [13]:
for col in incidence_rate_final:
    incidence_rate_final[col] = incidence_rate_final[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

incidence_rate_final['text'] = incidence_rate_final["Abbreviation"] + '<br>' +\
    'Incidence (Rate) '+incidence_rate_final["Incidence (Rate)"]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = incidence_rate_final["Abbreviation"],
        z = incidence_rate_final["Incidence (Rate)"].astype(float),
        locationmode = 'USA-states',
        text = incidence_rate_final['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "Incidence (Rate)"
        )
    ) ]

layout = dict(
        title = '2017 US Mental Illness Incidence Rate',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig9 = dict( data=data, layout=layout )

url9 = py.plot( fig9, filename='incidencerate' )
url9

'https://plot.ly/~bkcenik/18'

In [15]:
print(f"incidence url is at https://plot.ly/~bkcenik/18")

incidence url is at https://plot.ly/~bkcenik/18
