In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import datetime
import os
import gmaps
import gmaps.datasets
from mapsplotlib import mapsplot as mplt
from config import gkey
mplt.register_api_key(api_key=gkey)
from pprint import pprint

# Import API key
#from api_keys import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

#Input Files (from cleandata)
facilities_file = os.path.join("..", "data", "cleandata", "facilities.csv")
us_population_file = os.path.join("..", "data", "cleandata", "population.csv")
suicide_us_file = os.path.join("..", "data", "cleandata", "master.csv")
Expenditure_file = os.path.join("..", "data", "cleandata", "Expenditure.csv")
suicide_file = os.path.join("..", "data", "cleandata", "SuicByState10yr.csv")


#Output File (PNG)
output_us_2017_rates_bar = os.path.join("..", "Images", "us_suic_rt_2017.png")
output_us_suicide_vs_age = os.path.join("..", "Images", "us_suicide_vs_age_line.png")
output_facilities_vs_pop = os.path.join("..", "Images", "mh_facilities_vs_population.png")
path_out_internat = os.path.join("..", "Images", "internat_suic_sorted.png")
output_us_avg_vs_world_avg = os.path.join("..", "Images", "us_vs_world_line.png")
path_PieMa1985 = os.path.join("..", "Images", "US_male1985.png")
path_PiFe1985 = os.path.join("..", "Images", "US_FEmale1985.png")
path_PiMa2015 = os.path.join("..", "Images", "US_male2015.png")
path_PiFe2015 = os.path.join("..", "Images", "US_FEmale2015.png")

ModuleNotFoundError: No module named 'config'

## Bar Chart: US Suicide Rates by State

In [None]:
df = pd.read_csv('../data/cleandata/SuicByState10yr.csv')
df.groupby('stateName').mean()
state_data = df[['stateName','2017suic']].set_index('stateName')
state_data.sort_values(by=['2017suic'], inplace=True, ascending=False)
state_data.plot(kind='bar', figsize=(15,5))


plt.ylabel("Suicides /100,000 population")
plt.xlabel("State")
plt.title("Suicide Rates by State, 2017 (per 100,000 population)")

plt.savefig(output_us_2017_rates_bar, bbox_inches="tight")
plt.show()


## Bar Chart: US facilities vs US Population by State


In [None]:
#Read Mental Health Facilities in the US, input into df
facilities_df = pd.read_csv(facilities_file)
print(facilities_df.count())

#Read US Census Bureau population data, input into df
pop_df = pd.read_csv(us_population_file)
print(pop_df.count())

In [None]:
# Merge two dataframes using an inner join
merge_table = pd.merge(facilities_df, pop_df, on="state", how='inner')
merge_table.head()

In [None]:
#Group merged dt by state so as to plot the bar chart by state
facilities_count_by_state = merge_table.groupby('state').agg({'name1':'count','Population':'mean'})
facilities_count_by_state.rename(columns={"name1":"Total Facilities"}, inplace=True)
fig = plt.figure() # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

facilities_count_by_state['Total Facilities'].plot(kind='bar', color='coral', ax=ax, figsize=(20,5), position=1, align='edge', width=0.3)
facilities_count_by_state.Population.plot(kind='bar', color='lightblue', ax=ax2, figsize=(20,5), position=0, align='edge', width=0.3)

ax.set_title("Number of Mental Health Facilities per US State vs. Population")
ax.set_xlabel("State")
ax.set_ylabel('Total Mental Health Facilities')
ax2.set_ylabel('Population (ten millions)')

fig.legend(loc='upper right', bbox_to_anchor=(0.95, 0.90))

# Save our graph and show the graph
plt.tight_layout()
plt.savefig(output_facilities_vs_pop)
plt.show()


## Line Graph: Age groups vs. Suicide Rate in the US

In [None]:
#Read Kaggle master suicide data, input into df
suicide_df = pd.read_csv(suicide_us_file)
#Filter for United States only
US_data = suicide_df [suicide_df['country'] == 'United States']
US_data.head()


In [None]:
US_data.info()

In [None]:
#create a dt with only necessary columnns
US_data_sub=US_data[['year', 'age', 'suicides_no', 'population', 'suicides/100k pop']]
#gather data in groups by year and age range to plot trends
df_age_grp_by_yr=US_data_sub.groupby(['year','age'], as_index=False)
df_tot_by_yr=df_age_grp_by_yr['suicides_no'].sum()
df_tot_by_yr.head()

In [None]:
#Line graph age range vs. suicide rate in US
age_grp_1 = df_tot_by_yr.loc[df_tot_by_yr['age']=="5-14 years", 'suicides_no']
age_grp_2 = df_tot_by_yr.loc[df_tot_by_yr['age']=="15-24 years", 'suicides_no']
age_grp_3 = df_tot_by_yr.loc[df_tot_by_yr['age']=="25-34 years", 'suicides_no']
age_grp_4 = df_tot_by_yr.loc[df_tot_by_yr['age']=="35-54 years", 'suicides_no']
age_grp_5 = df_tot_by_yr.loc[df_tot_by_yr['age']=="55-74 years", 'suicides_no']
age_grp_6 = df_tot_by_yr.loc[df_tot_by_yr['age']=="75+ years", 'suicides_no']
time_line = df_tot_by_yr['year'].unique()

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(time_line, age_grp_1, label="5-14 years")
ax.plot(time_line, age_grp_2, label="15-24 years")
ax.plot(time_line, age_grp_3, label="25-34 years")
ax.plot(time_line, age_grp_4, label="35-54 years")
ax.plot(time_line, age_grp_5, label="55-74 years")
ax.plot(time_line, age_grp_6, label="75+ years")
ax.legend()

# Incorporate the other graph properties
plt.title(f"Total US Suicides by Year and Age Range" )
plt.ylabel("Total Suicides")
plt.xlabel("Year")
plt.margins(0.01)


fig.tight_layout()
fig.savefig(output_us_suicide_vs_age)

plt.show()

## Pie Charts: Age groups by Gender vs. Suicide Rate in the US

In [None]:
internat_read = os.path.join("..", "data", "cleandata", "master.csv")
internat_read = pd.read_csv(internat_read)    
## 27820 rows:  Suicide rates by gender, age, GDP for 101countries; 
## 6agegroups, 2genders, 32yrs (missing years!).
## Drop irrelevant columns, Capture only US data:
internat_read = internat_read.drop(["country-year", "HDI for year"], axis=1)
internat_drop = pd.DataFrame(internat_read)
US_data = internat_drop [internat_drop['country'] == 'United States']
## Sort by age groups, yields order: 35-54, 5-14, 55-74 from text strings.
US_data = US_data.sort_values('age')
US_data.head()                  # 372rows × 12columns, m/f x 6 age groups

In [None]:
US_data_male = US_data[US_data['sex'] == "male"]
US_data_female = US_data[US_data['sex'] == "female"]
## Catch male and female data, then 1st and last complete year
US_data_male_1985 = US_data_male[US_data_male['year'] == 1985]
US_data_male_2015 = US_data_male[US_data_male['year'] == 2015]
US_data_female_1985 = US_data_female[US_data_female['year'] == 1985]
US_data_female_2015 = US_data_female[US_data_female['year'] == 2015]

In [None]:
# pie plots to compare male suicides 30yrs apart, and male:female rates.
font = {'family' : 'normal',        ## 4 pie charts/1 page, so font BIG.
        'weight' : 'normal',
        'size'   : 20}
plt.rc('font', **font)
labels = ("15-24 years","25-34 years","35-54 years","5-14 years", \
          "55-74 years","75+ years")
explode = (0.1, 0, 0, 0, 0, 0)
sizes = US_data_male_1985["suicides_no"]
labels = US_data_male_1985["age"]
colors = ["purple", "pink", "red", "lightskyblue", "orange", "yellowgreen"]
plt.title("Suicide by Age Group: 1985, MALES")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=135)
plt.axis("auto")
path_PiMa1985 = os.path.join("..", "Images", "US_male1985.png")
plt.savefig(path_PiMa1985, bbox_inches="tight")

In [None]:
# Plot FEmales for 1985; then Male rates for 2015, then females for 2015.
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 20}
plt.rc('font', **font)
labels = ("15-24 years","25-34 years","35-54 years","5-14 years", \
          "55-74 years","75+ years")
explode = (0.1, 0, 0, 0, 0, 0)
sizes = US_data_female_1985["suicides_no"]
labels = US_data_female_1985["age"]
colors = ["purple", "pink", "red", "lightskyblue", "orange", "yellowgreen"]
plt.title("Suicide by Age Group: 1985, FEmales")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=135)
plt.axis("auto")
path_PiFe1985 = os.path.join("..", "Images", "US_FEmale1985.png")
plt.savefig(path_PiFe1985, bbox_inches="tight")

In [None]:
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 20}
plt.rc('font', **font)
labels = ("15-24 years","25-34 years","35-54 years","5-14 years","55-74 years","75+ years")
explode = (0.1, 0, 0, 0, 0, 0)
sizes = US_data_male_2015["suicides_no"]
labels = US_data_male_2015["age"]
colors = ["purple", "pink", "red", "lightskyblue", "orange", "yellowgreen"]
plt.title("Suicide by Age Group: 2015, MALES")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=135)
plt.axis("auto")
path_PiMa2015 = os.path.join("..", "Images", "US_male2015.png")
plt.savefig(path_PiMa2015, bbox_inches="tight")

In [None]:
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 20}
plt.rc('font', **font)
labels = ("15-24 years","25-34 years","35-54 years","5-14 years","55-74 years","75+ years")
explode = (0.1, 0, 0, 0, 0, 0)
sizes = US_data_female_2015["suicides_no"]
labels = US_data_female_2015["age"]
colors = ["purple", "pink", "red", "lightskyblue", "orange", "yellowgreen"]
plt.title("Suicide by Age Group: 2015, FEmales")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=135)
plt.axis("auto")
path_PiFe2015 = os.path.join("..", "Images", "US_FEmale2015.png")
plt.savefig(path_PiFe2015, bbox_inches="tight")

## Suicide Rates by Country, OECD

In [None]:
#  open data file from OECD, Organisation for Economic Co-operation and Development
path_in = os.path.join("..", "data", "cleandata", "DP_LIVE_21032019024128590.csv")
oecd_data = pd.read_csv(path_in)
oecd_df = pd.DataFrame(oecd_data)
oecd_df.head()
# each country may have multiple years, 1 line/year
# no breakout just total suicides per capita age-adjusted (good basis of comparison)

In [None]:
# len(oecd_df.groupby(["LOCATION"]).groups)     # 38 countries, 
# oecd_df.groupby('LOCATION').first()          # all have values for 2014
# oecd_df.groupby('LOCATION').last()          # some ONLY have data for 2014
# No population data or economica data, but a good overview comparing 2nd-1st world.
### Groupby to gather only 2014 data, since it is the only complete year. 
### Then sort by lowest to hightest suic rate. Set new index once sorted.
oecd_group = pd.DataFrame(oecd_df.groupby('LOCATION').first())
oecd_group = oecd_group.sort_values('Value')
oecd_group = oecd_group.reset_index()
oecd_group.head()

In [None]:
# The country codes are difficult to understand. And don't match the OECD members.
country_members = ["Australia","Austria""Belgium","Canada","Chile","Czech Republic",\
                   "Denmark","Estonia","Finland","France","Germany","Greece","Hungary",\
                   "Iceland","Ireland","Israël","Italy","Japan","Korea","Latvia","Lithuania"\
                   "Luxembourg","Mexico","Netherlands","New Zealand","Norway","Poland",\
                   "Portugal","Slovak Republic","Slovenia","Spain","Switzerland",
                   "Turkey","United Kingdom","United States"]
# len(country_members)                        # 36 countries... ZAF and which other are extra? 
# len(oecd_group.loc[:,"LOCATION"])         # 38 countries
print(*oecd_group.loc[:,"LOCATION"])
print(*country_members)
###  oecd_df is missing "Canada", "Latvia", "New Zealand", "United States"
###  oecd_df extras incl. "BRA", "COL", "CRI", "LVA", "ZAF"

In [None]:
# Convert 3-letter country codes > European names, the hard way.
countries = {"AUS":"Australia", "AUT":"Austria", "BEL":"Belgium", \
             "BRA":"Brazil", "CHE":"Switzerland", "CHL":"Chile", 
             "COL":"Columbia", "CRI":"Costa Rica", "CZE":"Czech Republic",\
             "DEU":"Germany", "DNK":"Denmark", "ESP":"Spain", \
             "EST":"Estonia", "FIN":"Finland", "FRA":"France", \
             "GBR":"United Kingdom", "GRC":"Greece", "HUN":"Hungary", \
             "IRL":"Ireland", "ISL":"Iceland", "ISR":"Israël", \
             "ITA":"Italy", "JPN":"Japan", "KOR":"Korea", \
             "LTU":"Lithuania", "LUX":"Luxembourg", "MEX":"Mexico", \
             "NLD":"Netherlands", "NOR":"Norway", "POL":"Poland", \
             "PRT":"Portugal", "SLV":"Slovak Republic", "SVN":"Slovenia", \
             "SWE":"Sweden", "TUR":"Turkey", "ZAF":"South Africa"}
oecd_group.replace(to_replace=countries, value=None, inplace=True)
oecd_countries = pd.DataFrame(oecd_group)
oecd_countries.head()

In [None]:
y_axis = np.arange(len(oecd_countries))
plt.figure(figsize=(5,8))
plt.barh(y_axis, oecd_countries["Value"], color='r', align="center", \
         edgecolor='k', linewidth=1)
plt.xlim(-0.5, (oecd_countries["Value"]).max()+1)
plt.ylim(-1, len(oecd_countries))
labels = oecd_countries.loc[:,"LOCATION"]
plt.yticks(y_axis, labels) 
plt.title("Suicide Rates by Country, 2014 (per 100,000 population)")
plt.xlabel("Suicides /100,000 population")
plt.ylabel("Countries")

plt.savefig(path_out_internat, bbox_inches="tight")
plt.show()


## Mental Health Facility locations on US map


In [None]:
facilities_df = pd.read_csv(facilities_file)
facilities_df.head()

In [None]:
location = facilities_df[["latitude", "longitude"]]
location.head()

In [None]:
mplt.density_plot(location['latitude'], location['longitude'])

In [None]:
con_us=facilities_df[(facilities_df["state"]!= "HI")&(facilities_df["state"]!= "AK")]


location_one = con_us[["latitude", "longitude"]]

In [None]:
mplt.density_plot(con_us['latitude'] , con_us['longitude'])

In [None]:
I_dontknow =facilities_df[(facilities_df["state"]== "HI")&(facilities_df["state"]!= "AK")]
I_dontknow.head()

In [None]:
mplt.density_plot(I_dontknow['latitude'] ,I_dontknow['longitude'])

In [None]:
alaska =facilities_df[(facilities_df["state"]== "AK")&(facilities_df["state"]!= "HI")]
alaska.head()

In [None]:
mplt.density_plot(alaska['latitude'] ,alaska['longitude'])

## US suicide rate vs World suicide rate

In [None]:
suicide_df = pd.read_csv(suicide_us_file)
#remove 2016 data b/c only there for some countries
suicide_new_df = suicide_df[suicide_df['year']!=2016]

summary_df = suicide_new_df.groupby(['country', 'year'], as_index=False).agg({'suicides/100k pop':'sum'})

summary_df.pivot(index='country', columns='year', values='suicides/100k pop')
# Set the 'Country Code' to be our index for easy referencing of rows
summary_df = summary_df.set_index("country")
# Collect the mean suicide rates for the world
average_suicide = summary_df.groupby('year').mean()
# Collect the years where data was collected
years = summary_df.year.unique()

#i have no idea why I can't get series returned just using the .loc; this is a workaround
x=summary_df.loc[['United States'], ["year", "suicides/100k pop"]].groupby('year')
us_average = x.mean()

fig = plt.figure(figsize=(10,3))

# Plot the world average as a line chart
world_avg, = plt.plot(sorted(years), average_suicide, color="blue", label="World Average" )

# Plot the unemployment values for a single country
country_one, = plt.plot(sorted(years), us_average, color="green",label='United States')

# Create a legend for our chart
plt.legend(handles=[world_avg, country_one], loc="best")
plt.title(f"US vs. World Suicide Rate" )
plt.ylabel("Suicides/100k Population")
plt.xlabel("Year")

# Show the chart
fig.tight_layout()
fig.savefig(output_us_avg_vs_world_avg)

plt.show()

## Next Chart:

In [None]:
ourExpenditure_df = pd.read_csv(Expenditure_file)
suicide_df = pd.read_csv(suicide_file)



Expenditures_2004 = ourExpenditure_df['FY2004__SMHA Expenditures Per Capita']
Expenditures_2005 = ourExpenditure_df['FY2005__SMHA Expenditures Per Capita']
Expenditures_2006 = ourExpenditure_df['FY2006__SMHA Expenditures Per Capita']
Expenditures_2007 = ourExpenditure_df['FY2007__SMHA Expenditures Per Capita']
Expenditures_2008 = ourExpenditure_df['FY2008__SMHA Expenditures Per Capita']
Expenditures_2009 = ourExpenditure_df['FY2009__SMHA Expenditures Per Capita']
Expenditures_2010 = ourExpenditure_df['FY2010__SMHA Expenditures Per Capita']
Expenditures_2011 = ourExpenditure_df['FY2011__SMHA Expenditures Per Capita']
Expenditures_2012 = ourExpenditure_df['FY2012__SMHA Expenditures Per Capita'] 
Expenditures_2013 = ourExpenditure_df['FY2013__SMHA Expenditures Per Capita']


suicide_rate_2004 = ourfiltered_suicide_df ['2004suic']
suicide_rate_2005 = ourfiltered_suicide_df ['2005suic']
suicide_rate_2006 = ourfiltered_suicide_df ['2006suic']
suicide_rate_2007 = ourfiltered_suicide_df ['2007suic']
suicide_rate_2008 = ourfiltered_suicide_df ['2008suic']
suicide_rate_2009 = ourfiltered_suicide_df ['2009suic']
suicide_rate_2010 = ourfiltered_suicide_df ['2010suic']
suicide_rate_2011 = ourfiltered_suicide_df ['2011suic']
suicide_rate_2012 = ourfiltered_suicide_df ['2012suic']
suicide_rate_2013 = ourfiltered_suicide_df ['2013suic']

plt.figure(figsize=(15,15), dpi=50)


scatter_2004 = plt.scatter(Expenditures_2004,suicide_rate_2004, s= 150, marker= "o", facecolors="blue", edgecolors="black", alpha=.8)
scatter_2005 = plt.scatter(Expenditures_2005, suicide_rate_2005,s= 150, marker= "s", facecolors="blue", edgecolors="black", alpha=.8)
scatter_2006 = plt.scatter(Expenditures_2006, suicide_rate_2006,s= 150, marker= "o", facecolors="yellow", edgecolors="black", alpha=.8)
scatter_2007 = plt.scatter(Expenditures_2007, suicide_rate_2007,s= 150, marker= "s", facecolors="yellow", edgecolors="black", alpha=.8)
scatter_2008 = plt.scatter(Expenditures_2008, suicide_rate_2008,s= 150, marker= "o", facecolors="orange", edgecolors="black", alpha=.8)
scatter_2009 = plt.scatter(Expenditures_2009, suicide_rate_2009,s= 150, marker= "s", facecolors="orange", edgecolors="black", alpha=.8)
scatter_2010 = plt.scatter(Expenditures_2010, suicide_rate_2010,s= 150, marker= "o", facecolors="green", edgecolors="black", alpha=.8)
scatter_2011 = plt.scatter(Expenditures_2011, suicide_rate_2011,s= 150, marker= "s", facecolors="green", edgecolors="black", alpha=.8)
scatter_2012 = plt.scatter(Expenditures_2012, suicide_rate_2012,s= 150, marker= "o", facecolors="red", edgecolors="black", alpha=.8)
scatter_2013 = plt.scatter(Expenditures_2013, suicide_rate_2013,s= 150, marker= "s", facecolors="red", edgecolors="black", alpha=.8)



plt.title("State Spending on Mental Health Services by Year", fontsize = 30)
plt.xlabel("Expenditures Per Capita (Millions)", fontsize=30)
plt.ylabel("Suicide Rate", fontsize = 30)
plt.grid()



plt.legend((scatter_2004, scatter_2005, scatter_2006,scatter_2007,scatter_2008,scatter_2009, scatter_2010,scatter_2011,scatter_2012,scatter_2013),
           ('2004', '2005', '2006','2007','2008', '2009', '2010', '2011','2012','2013'), loc ='lower left', bbox_to_anchor=(1, 0.5),
           scatterpoints= 1,fontsize=25)
# Save Figure
plt.savefig("../Images/State_Spending_Affects_On_Mental_Health.png")

plt.show()

