In [2]:
%matplotlib notebook

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [4]:
# import csv
rough = "Resources/suicide_rates.csv"
rough_data_df = pd.read_csv(rough)

In [5]:
# drop unnecessary columns
rough_df = rough_data_df.drop(columns=["HDI for year", "country-year", "generation", "gdp_per_capita ($)", "suicides_no", "population"])

# drop rows with missing values
rough_df.dropna(how='any')

# clean up age range fill
rough_df['age'] = rough_df['age'].replace({'35-54 years': '35-54', '25-34 years': '25-34', '55-74 years': '55-74', '75+ years': '75+', '15-24 years': '15-24', '5-14 years': '05-14'})

# rename columns
renamed_df = rough_df.rename(columns={"country":"Country", "year":"Year", "sex":"Sex", "age":"Age Range", "suicides/100k pop":"Suicides/100k pop", " gdp_for_year ($) ":"GDP(Year)"})

# filter for years 2000-2016
after_start_date = renamed_df["Year"] >= 2000
before_end_date = renamed_df["Year"] <= 2016
between_two_dates = after_start_date & before_end_date
filtered_date = renamed_df.loc[between_two_dates]

filtered_date_df = pd.DataFrame(filtered_date)

# Convert data type for GDP calculations 
filtered_date_df["GDP(Year)"] = filtered_date_df["GDP(Year)"].str.replace(',','').astype(int)

# display sample
filtered_date_df.head()

Unnamed: 0,Country,Year,Sex,Age Range,Suicides/100k pop,GDP(Year)
132,Albania,2000,male,25-34,7.33,3632043908
133,Albania,2000,male,55-74,5.64,3632043908
134,Albania,2000,female,75+,5.29,3632043908
135,Albania,2000,male,75+,4.02,3632043908
136,Albania,2000,female,15-24,2.27,3632043908


In [6]:
# group by year and country / find averages

country_gdp = filtered_date_df.groupby(["Country", "Year"])["GDP(Year)"].mean()
country_suicide = filtered_date_df.groupby(["Country", "Year"])['Suicides/100k pop'].mean()

# create summary table
country_summary_df = pd.DataFrame({"GDP Mean": country_gdp, "Suicide Rate Mean": country_suicide})

country_summary_reset_df = country_summary_df.reset_index()

# display table
country_summary_reset_df


Unnamed: 0,Country,Year,GDP Mean,Suicide Rate Mean
0,Albania,2000,3632043908,2.558333
1,Albania,2001,4060758804,4.218333
2,Albania,2002,4435078648,5.209167
3,Albania,2003,5746945913,4.883333
4,Albania,2004,7314865176,5.449167
...,...,...,...,...
1345,Uzbekistan,2010,39332770929,5.269167
1346,Uzbekistan,2011,45915191189,5.595000
1347,Uzbekistan,2012,51821573338,6.460833
1348,Uzbekistan,2013,57690453461,6.571667


In [7]:
country_summary_reset_df["Country"] = pd.Categorical(country_summary_reset_df["Country"])

# Set the figure size
plt.figure(figsize=(10, 10))

# Scatterplot
plt.scatter(
    x = country_summary_reset_df["Year"], 
    y = country_summary_reset_df["GDP Mean"], 
    s = country_summary_reset_df["Suicide Rate Mean"], 
    c = country_summary_reset_df["Country"].cat.codes,
    cmap="tab20",  
    edgecolors="white", 
    linewidth=2);
 
# Add titles (main and on axis)
plt.yscale('log')
plt.xlabel("Country")
plt.ylabel("GDP Mean")
plt.title("a title goes here")
#plt.ylim(?,?)
#plt.xlim(?, ?);

# Display plot
plt.show()

<IPython.core.display.Javascript object>

In [10]:
# Group by Country, Year, Ages / find averages
age_gdp = filtered_date_df.groupby(["Country", "Age Range"])["GDP(Year)"].mean()
age_suicide = filtered_date_df.groupby(["Country", "Age Range"])['Suicides/100k pop'].mean()

# Create summary table
age_summary_df = pd.DataFrame({"GDP Mean": age_gdp, "Suicide Rate Mean": age_suicide})

age_summary_reset_df = age_summary_df.reset_index()

# Display table
age_summary_reset_df


Unnamed: 0,Country,Age Range,GDP Mean,Suicide Rate Mean
0,Albania,05-14,8.172219e+09,0.524091
1,Albania,15-24,8.172219e+09,3.287273
2,Albania,25-34,8.172219e+09,4.156364
3,Albania,35-54,8.172219e+09,3.963182
4,Albania,55-74,8.172219e+09,3.438182
...,...,...,...,...
582,Uzbekistan,15-24,3.023597e+10,8.576250
583,Uzbekistan,25-34,3.023597e+10,9.044583
584,Uzbekistan,35-54,3.023597e+10,9.106667
585,Uzbekistan,55-74,3.023597e+10,7.130000


In [13]:
age_summary_reset_df["Country"] = pd.Categorical(age_summary_reset_df["Country"])

# Set the figure size
plt.figure(figsize=(10, 10))

# Scatterplot
plt.scatter(
    x = age_summary_reset_df["Age Range"], 
    y = age_summary_reset_df["GDP Mean"], 
    s= age_summary_reset_df["Suicide Rate Mean"], 
    c= age_summary_reset_df["Country"].cat.codes, 
    cmap="tab20",  
    edgecolors="white", 
    linewidth=2);
 
# Add titles (main and on axis)
plt.yscale('log')
plt.xlabel("Age Range")
plt.ylabel("GDP Mean")
plt.title("a title goes here")
#plt.ylim(?,?)
#plt.xlim(?, ?);

# Display plot
plt.show()

<IPython.core.display.Javascript object>

In [18]:
# Group by Country, Year, Sex / find averages
sex_gdp = filtered_date_df.groupby(["Country", "Sex"])["GDP(Year)"].mean()
sex_suicide = filtered_date_df.groupby(["Country", "Sex"])['Suicides/100k pop'].mean()

# Creat summary table
sex_summary_df = pd.DataFrame({"GDP Mean": sex_gdp, "Suicide Rate Mean": sex_suicide})

sex_summary_reset_df = sex_summary_df.reset_index()

# Display table
sex_summary_reset_df

Unnamed: 0,Country,Sex,GDP Mean,Suicide Rate Mean
0,Albania,female,8.172219e+09,2.475909
1,Albania,male,8.172219e+09,4.287424
2,Antigua and Barbuda,female,1.096668e+09,0.185952
3,Antigua and Barbuda,male,1.096668e+09,1.562857
4,Argentina,female,3.455685e+11,3.373229
...,...,...,...,...
191,United States,male,1.406948e+13,21.297812
192,Uruguay,female,3.100206e+10,7.321778
193,Uruguay,male,3.100206e+10,35.760000
194,Uzbekistan,female,3.023597e+10,3.689167


In [20]:
sex_summary_reset_df["Country"] = pd.Categorical(sex_summary_reset_df["Country"])

# Set the figure size
plt.figure(figsize=(10, 10))

# Scatterplot
plt.scatter(
    x = sex_summary_reset_df["Sex"], 
    y = sex_summary_reset_df["GDP Mean"], 
    s = sex_summary_reset_df["Suicide Rate Mean"], 
    c = sex_summary_reset_df["Country"].cat.codes, 
    cmap="tab20", 
    edgecolors="white", 
    linewidth=2);
 
# Add titles (main and on axis)
plt.yscale('log')
plt.xlabel("Sex")
plt.ylabel("GDP Mean")
plt.title("a title goes here")
#plt.ylim(?,?)
#plt.xlim(?, ?);

# Display plot
plt.show()

<IPython.core.display.Javascript object>

In [21]:
#Are suicide rates higher in developing countries or developed countries?

#heatmap of suicide rates globally 


In [22]:
#Country v.  Overall suicide rate
#mean_sui = filtered_date_df['Suicides/100k pop'].mean()
#filtered_date_df['Above Average?'] = (filtered_date_df['Suicides/100k pop'] >= mean_sui)

df_Country_Overall_suicide_rate = filtered_date_df.groupby('Country')['Suicides/100k pop'].mean()
#filtered_date_df.head()

#define 'Overall suicide rate'
#mean_sui1 = filtered_date_df['Suicides/100k pop'].mean()
mean_sui2 = df_Country_Overall_suicide_rate.mean()
#print(mean_sui1)raw average
#print(mean_sui2)average of averages
#df_Country_Overall_suicide_rate['Suicides/100k pop'] 
#df_df_Country_Overall_suicide_rate() 
Deviation = df_Country_Overall_suicide_rate - mean_sui2
country_mean_df = pd.DataFrame({"Country Mean":df_Country_Overall_suicide_rate, "Global Mean":mean_sui2, "Deviation":Deviation})
country_mean_df

Unnamed: 0_level_0,Country Mean,Global Mean,Deviation
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Albania,3.381667,11.481212,-8.099545
Antigua and Barbuda,0.874405,11.481212,-10.606807
Argentina,10.186094,11.481212,-1.295118
Armenia,3.131517,11.481212,-8.349695
Aruba,8.142639,11.481212,-3.338573
...,...,...,...
United Arab Emirates,1.317917,11.481212,-10.163295
United Kingdom,6.769635,11.481212,-4.711577
United States,12.945417,11.481212,1.464205
Uruguay,21.540889,11.481212,10.059677


In [23]:
#Gender v. Country & Suicide rate
Gender_tmp_df = filtered_date_df.groupby(['Country','Sex'])['Suicides/100k pop'].mean()


Gender_tmp_df.head()

Country              Sex   
Albania              female    2.475909
                     male      4.287424
Antigua and Barbuda  female    0.185952
                     male      1.562857
Argentina            female    3.373229
Name: Suicides/100k pop, dtype: float64

In [24]:
#Age Range v. Country & Suicide rate
Age_tmp_df = filtered_date_df.groupby(['Country','Age Range'])['Suicides/100k pop'].mean()

Age_tmp_df




Country     Age Range
Albania     05-14        0.524091
            15-24        3.287273
            25-34        4.156364
            35-54        3.963182
            55-74        3.438182
                           ...   
Uzbekistan  15-24        8.576250
            25-34        9.044583
            35-54        9.106667
            55-74        7.130000
            75+          5.862500
Name: Suicides/100k pop, Length: 587, dtype: float64

In [25]:
#suicide rates in the United States for 2017-2019?

In [26]:
#Aggrate data by gender/gdp & suicide rate for 2017-2019

In [27]:
#Aggrate Data by gender/gdp & suicide rate for 2017-2019