In [1]:
import pandas as pd
import csv

# Pre-work Data Exploration
* Overview - variables/indicators
* Info - length, rows/columns, object type, value counts, unique/duplicates or NaN items (if there's any) 
* Summary Statistics
* Pre-merging of datasets (if applicable)

## Suicide Rate (1987-2016) 

In [2]:
# Load Suicide Rate 1987-2016
Suicide_Rate_1987_2016 = "Data sets/Raw Data/Suicide rate/rate_1987-2016.csv"
k_Suicide_Rate_1987_2016 = pd.read_csv(Suicide_Rate_1987_2016)
k_Suicide_Rate_1987_2016.head(1)

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X


## Depression Data & Suicide Rate (1990-2017) 

In [5]:
# Load/Read rate_depression_1990-2017
rate_depression_1990_2017 = "Data sets/Raw Data/Suicide rate/rate_depression_1990-2017.csv"
k_rate_depression_1990_2017 = pd.read_csv(rate_depression_1990_2017)
k_rate_depression_1990_2017.head(1)

Unnamed: 0,Entity,Code,Year,"Suicide rate (deaths per 100,000 individuals)","Depressive disorder rates (number suffering per 100,000)",Population
0,Afghanistan,AFG,1800,,,3280000.0


In [6]:
# Show rate_depression_1990-2017 general info
k_rate_depression_1990_2017.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47807 entries, 0 to 47806
Data columns (total 6 columns):
 #   Column                                                    Non-Null Count  Dtype  
---  ------                                                    --------------  -----  
 0   Entity                                                    47807 non-null  object 
 1   Code                                                      46081 non-null  object 
 2   Year                                                      47807 non-null  object 
 3   Suicide rate (deaths per 100,000 individuals)             6468 non-null   float64
 4   Depressive disorder rates (number suffering per 100,000)  6468 non-null   float64
 5   Population                                                46883 non-null  float64
dtypes: float64(3), object(3)
memory usage: 2.2+ MB


In [55]:
k_rate_depression_1990_2017.dropna(inplace=True)
k_rate_depression_1990_2017

Unnamed: 0,Entity,Code,Year,"Suicide rate (deaths per 100,000 individuals)","Depressive disorder rates (number suffering per 100,000)",Population
190,Afghanistan,AFG,1990,10.318504,4039.755763,12412000.0
191,Afghanistan,AFG,1991,10.327010,4046.256034,13299000.0
192,Afghanistan,AFG,1992,10.271411,4053.709902,14486000.0
193,Afghanistan,AFG,1993,10.376123,4060.203474,15817000.0
194,Afghanistan,AFG,1994,10.575915,4062.290365,17076000.0
...,...,...,...,...,...,...
47800,Zimbabwe,ZWE,2013,28.361200,3048.264249,13350000.0
47801,Zimbabwe,ZWE,2014,27.605547,3056.996704,13587000.0
47802,Zimbabwe,ZWE,2015,27.197061,3068.250731,13815000.0
47803,Zimbabwe,ZWE,2016,26.839591,3081.782858,14030000.0


In [56]:
# Double check if there are NaN values left in "Suicide rate (deaths per 100,000 individuals)" column
d_check_null = pd.isnull(k_rate_depression_1990_2017['Suicide rate (deaths per 100,000 individuals)'])
k_rate_depression_1990_2017[d_check_null]

Unnamed: 0,Entity,Code,Year,"Suicide rate (deaths per 100,000 individuals)","Depressive disorder rates (number suffering per 100,000)",Population


## Depression & Suicide Rate Cohort Y2015-2017

#### Prevalence Rate of Suicide (deaths per 100,000)

In [70]:
# Select the year cohort that will be correlated with Happiness Report
year_cohort = k_rate_depression_1990_2017.loc[(k_rate_depression_1990_2017['Year'] =='2015') | 
                                            (k_rate_depression_1990_2017['Year'] =='2016') |
                                            (k_rate_depression_1990_2017['Year'] == '2017'), :] 
# Display pivot table showing Year as columns
suicide_cohort = pd.pivot_table(year_cohort, values='Suicide rate (deaths per 100,000 individuals)', index=['Entity'],
                           columns='Year')
suicide_cohort.head()

Year,2015,2016,2017
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,9.319114,9.2516,9.188568
Albania,5.191083,5.155579,5.108781
Algeria,4.291622,4.187815,4.124306
American Samoa,6.79261,6.753623,6.738545
Andorra,7.967667,7.94437,7.912273


In [71]:
suicide_cohort.info()

<class 'pandas.core.frame.DataFrame'>
Index: 196 entries, Afghanistan to Zimbabwe
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2015    196 non-null    float64
 1   2016    196 non-null    float64
 2   2017    196 non-null    float64
dtypes: float64(3)
memory usage: 6.1+ KB


In [74]:
# Display a summary statistic of Suicide data (2015-2017)
suicide_cohort.describe()

Year,2015,2016,2017
count,196.0,196.0,196.0
mean,10.467261,10.348405,10.266804
std,6.590778,6.421433,6.297894
min,2.485095,2.489959,2.492574
25%,5.970391,5.974226,5.997368
50%,9.299083,9.278195,9.241226
75%,12.534185,12.546712,12.431927
max,52.736039,51.609949,51.063239


#### Prevalence Rate of Depression (deaths per 100,000)

In [68]:
# From the selected year cohort, 
# Display pivot table showing Year as columns
depress_cohort = pd.pivot_table(year_cohort, values='Depressive disorder rates (number suffering per 100,000)', index=['Entity'],
                           columns=['Year'])
depress_cohort.head()

Year,2015,2016,2017
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,4046.831685,4044.720378,4042.996929
Albania,2102.716655,2103.861099,2104.971611
Algeria,3425.956529,3434.270011,3445.182117
American Samoa,2817.635241,2818.923237,2821.194026
Andorra,3417.22746,3415.914479,3418.14286


In [72]:
depress_cohort.info()

<class 'pandas.core.frame.DataFrame'>
Index: 196 entries, Afghanistan to Zimbabwe
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2015    196 non-null    float64
 1   2016    196 non-null    float64
 2   2017    196 non-null    float64
dtypes: float64(3)
memory usage: 6.1+ KB


In [73]:
# Display a summary statistic of Depression data 2015-2017
depress_cohort.describe()

Year,2015,2016,2017
count,196.0,196.0,196.0
mean,3280.90638,3279.147788,3278.426389
std,621.463531,616.531805,611.45701
min,2075.450656,2072.534268,2072.289024
25%,2820.502499,2820.463141,2821.217264
50%,3277.991397,3280.945964,3287.30635
75%,3657.491479,3658.408102,3663.509223
max,5804.713205,5762.235871,5722.814192


In [8]:
# Display Statistic Analysis before the NaN values were dropped and partitioned to 2015-2017 cohorts 
k_rate_depression_1990_2017.describe()

Unnamed: 0,"Suicide rate (deaths per 100,000 individuals)","Depressive disorder rates (number suffering per 100,000)",Population
count,6468.0,6468.0,46883.0
mean,12.06824,3350.318288,29827900.0
std,8.037739,616.56521,253086000.0
min,1.526792,2065.45187,905.0
25%,6.741737,2885.327905,201733.5
50%,10.668531,3353.253199,1542937.0
75%,14.742472,3739.782595,5886795.0
max,98.831696,6096.437594,7713468000.0


## Happiness Raw Data per Year (2015-2017) 

In [11]:
# Load/ happiness_2015
happiness_2015 = "Data sets/Raw Data/Happiness Score/happiness_2015.csv"
k_happiness_2015 = pd.read_csv(happiness_2015)
k_happiness_2015.head(1)

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738


In [13]:
# Rename Happiness Score with Y2015 
happy_2015 = k_happiness_2015.rename(columns={'Happiness Rank': '2015 Happiness Rank', 'Happiness Score': '2015 Happiness Score',
                                             'Standard Error': '2015 Standard Error', 'Economy (GDP per Capita)': '2015 Economy (GDP per Capita)',
                                             'Family': '2015 Family', 'Health (Life Expectancy)': '2015 Health (Life Expectancy)',
                                             'Freedom': '2015 Freedom', 'Trust (Government Corruption)': '2015 Trust (Government Corruption)',
                                             'Generosity': '2015 Generosity', 'Dystopia Residual': '2015 Dystopia Residual',})
happy_2015.head(1)

Unnamed: 0,Country,Region,2015 Happiness Rank,2015 Happiness Score,2015 Standard Error,2015 Economy (GDP per Capita),2015 Family,2015 Health (Life Expectancy),2015 Freedom,2015 Trust (Government Corruption),2015 Generosity,2015 Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738


In [15]:
# Load/Read happiness_2016
happiness_2016 = "Data sets/Raw Data/Happiness Score/happiness_2016.csv"
k_happiness_2016 = pd.read_csv(happiness_2016)
k_happiness_2016.head(1)

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939


In [16]:
# Rename Happiness Score with Y2016
happy_2016 = k_happiness_2016.rename(columns={'Happiness Rank': '2016 Happiness Rank', 'Happiness Score': '2016 Happiness Score',
                                             'Lower Confidence Interval': '2016 Lower Confidence Interval', 'Upper Confidence Interval': '2016 Upper Confidence Interval',
                                             'Economy (GDP per Capita)': '2016 Economy (GDP per Capita)',
                                             'Family': '2016 Family', 'Health (Life Expectancy)': '2016 Health (Life Expectancy)',
                                             'Freedom': '2016 Freedom', 'Trust (Government Corruption)': '2016 Trust (Government Corruption)',
                                             'Generosity': '2016 Generosity', 'Dystopia Residual': '2016 Dystopia Residual',})
happy_2016.head(1)

Unnamed: 0,Country,Region,2016 Happiness Rank,2016 Happiness Score,2016 Lower Confidence Interval,2016 Upper Confidence Interval,2016 Economy (GDP per Capita),2016 Family,2016 Health (Life Expectancy),2016 Freedom,2016 Trust (Government Corruption),2016 Generosity,2016 Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939


In [18]:
# Load/Read happiness_2017
happiness_2017 = "Data sets/Raw Data/Happiness Score/happiness_2017.csv"
k_happiness_2017 = pd.read_csv(happiness_2017)
k_happiness_2017.head(1)

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027


In [21]:
# Rename Happiness Score with Y2017
happy_2017 = k_happiness_2017.rename(columns={'Happiness.Rank': '2017 Happiness Rank', 'Happiness.Score': '2017 Happiness Score',
                                             'Whisker.high': '2017 Whisker.high', 'Whisker.low': '2017 Whisker.low', 'Economy..GDP.per.Capita.': '2017 Economy (GDP per Capita)',
                                             'Family': '2017 Family', 'Health..Life.Expectancy.': '2017 Health (Life Expectancy)',
                                             'Freedom': '2017 Freedom', 'Trust..Government.Corruption.': '2017 Trust (Government Corruption)',
                                             'Generosity': '2017 Generosity', 'Dystopia.Residual': '2017 Dystopia Residual',})
happy_2017.head(1)

Unnamed: 0,Country,2017 Happiness Rank,2017 Happiness Score,2017 Whisker.high,2017 Whisker.low,2017 Economy (GDP per Capita),2017 Family,2017 Health (Life Expectancy),2017 Freedom,2017 Generosity,2017 Trust (Government Corruption),2017 Dystopia Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027


## Combine ALL Happiness Data (2015-2017) 

In [41]:
# Use merge with functools reduce method  in order to integrate all variables per country
from functools import reduce

# Display full column length of the combined dataframe
pd.set_option('display.max_columns', None)

# Combine all Happiness Data from 2015-2017
happy_dfs = [happy_2015, happy_2016, happy_2017]
all_happiness = reduce(lambda left,right: pd.merge(left,right,on='Country'), happy_dfs)
all_happiness.head()

Unnamed: 0,Country,Region_x,2015 Happiness Rank,2015 Happiness Score,2015 Standard Error,2015 Economy (GDP per Capita),2015 Family,2015 Health (Life Expectancy),2015 Freedom,2015 Trust (Government Corruption),2015 Generosity,2015 Dystopia Residual,Region_y,2016 Happiness Rank,2016 Happiness Score,2016 Lower Confidence Interval,2016 Upper Confidence Interval,2016 Economy (GDP per Capita),2016 Family,2016 Health (Life Expectancy),2016 Freedom,2016 Trust (Government Corruption),2016 Generosity,2016 Dystopia Residual,2017 Happiness Rank,2017 Happiness Score,2017 Whisker.high,2017 Whisker.low,2017 Economy (GDP per Capita),2017 Family,2017 Health (Life Expectancy),2017 Freedom,2017 Generosity,2017 Trust (Government Corruption),2017 Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176,North America,6,7.404,7.335,7.473,1.44015,1.0961,0.8276,0.5737,0.31329,0.44834,2.70485,7,7.316,7.384403,7.247597,1.479204,1.481349,0.834558,0.611101,0.43554,0.287372,2.187264


In [34]:
# Display Country List
country_list2 = all_happiness['Country'].tolist()
country_list2.sort()
country_list2

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahrain',
 'Bangladesh',
 'Belarus',
 'Belgium',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Costa Rica',
 'Croatia',
 'Cyprus',
 'Czech Republic',
 'Denmark',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Estonia',
 'Ethiopia',
 'Finland',
 'France',
 'Gabon',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Guatemala',
 'Guinea',
 'Haiti',
 'Honduras',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 'Iran',
 'Iraq',
 'Ireland',
 'Israel',
 'Italy',
 'Ivory Coast',
 'Jamaica',
 'Japan',
 'Jordan',
 'Kazakhstan',
 'Kenya',
 'Kosovo',
 'Kuwait',
 'Kyrgyzstan',
 'Latvia',
 'Lebanon',
 'Liberia',
 'Libya',
 'Lithuania',
 'Luxembourg',
 'Macedonia',
 '

In [37]:
# Double check total count of countries in the list. Excluding Duplicates
total_countryHappy = len(all_happiness['Country'].unique())
print(f"The merged happiness data has {total_countryHappy} countries listed") 

The merged happiness data has 146 countries listed


## Export combined Happiness Report (2015-2017)

In [50]:
# Export combined Happiness data (2015-17) to csv & drop index in the process
all_happiness.to_csv('Data sets/Raw Data/Happiness Score/happiness_combinedY2015-17.csv', index=False)

#### Observations during data exploration & initial cleaning:

* Happiness 2015 & 2016 source data have similar column names but different statistic component (Standard error vs Confidence interval). Happiness 2017 does not include "Region" Column and has different statistic (Whisker high/low).
  
  In order to merge the 3 Happiness CSV files correctly, column names had to be cleaned first in order to identify what year the indicator belongs to.  
----
* Since our happiness data is limited to 2015-2017 and Suicide rates' most recent data is 2017, should we drop all other years and make a clean dataset for a three-year (2015-2017) correlation of Happiness/Depression vs Suicide Rate?
----
* What do we do with certain African territories that have no country codes? - drop? makeup something? or leave as NaN then include to the analysis? -- OK, We're dropping all countries with NaN values
----
* How about if for other key values(float) showing NaN, we fill it with the average value instead of dropping the entire row entries? -- OK, We're dropping all countries with NaN values
----


# Clean Data
* Remove countries with incomplete data during analysis period (1987 to 2016)
* Assess whether more recent time period has more complete data and adjust analysis period 
* Group variables if needed: 
    * Group countries based on regions: eg. North America, South America / developing 
    * Group countries based on GDP
    * Group duplicates if found 
    * Group by age group 
* Create new variables: 
    * Total suicide per country (combining granular data into total/avg for each country)  
    * Transform variable types if needed 


### Suicide Rate vs. Time (Year)

In [42]:
# Load new Suicide Rate from Vivi
Suicide_Rate_fromVivi_path = "Data sets/Raw Data/Suicide rate/suicide_rate_fromVivi.csv"
Suicide_Rate_fromVivi = pd.read_csv(Suicide_Rate_fromVivi_path)
Suicide_Rate_fromVivi.head(1)

Unnamed: 0.1,Unnamed: 0,Country Name,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,0,Afghanistan,7.4,7.33,6.98,6.82,6.84,6.77,6.65,6.58,6.53,6.53,6.54,6.52,6.37,6.25,6.16,6.01,5.92,5.84,5.73,5.63,5.54,5.46,5.39,5.35,5.3,5.29,5.27,5.25


In [43]:
# Remove "Unnamed: 0"
Suicide_Rate_fromVivi.drop(columns='Unnamed: 0', inplace=True )

In [128]:
# Display cleaner Suicide Rate over time (year)
Suicide_Rate_fromVivi.sample(5)

Unnamed: 0,Country Name,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
86,Japan,19.08,19.0,19.18,19.26,19.21,19.92,20.16,21.0,23.39,...,25.42,25.25,25.01,24.81,23.83,23.32,22.86,22.2,22.31,22.45
171,Thailand,12.78,13.44,14.17,15.29,16.72,18.47,19.77,18.29,18.34,...,12.28,11.92,12.13,12.09,11.95,11.64,11.74,11.88,12.24,12.42
6,Antigua and Barbuda,2.49,2.5,2.5,2.51,2.51,2.51,2.54,2.59,2.66,...,2.75,2.79,2.82,2.87,2.86,2.88,2.92,2.95,2.95,2.95
77,Iceland,15.18,14.85,14.54,14.13,13.92,13.8,13.71,13.62,13.48,...,11.61,11.68,11.56,11.45,11.64,11.89,12.05,12.02,12.08,11.82
36,China,19.15,19.54,20.06,20.2,20.05,19.87,19.09,18.13,17.09,...,10.17,10.04,9.89,9.52,9.24,9.02,9.09,9.14,9.1,9.1


In [44]:
# Display Statistic Overview of Suicide Rate over Time (Year)
Suicide_Rate_fromVivi.describe()

Unnamed: 0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,11.293333,11.336821,11.452103,11.682513,11.861897,11.931949,11.807179,11.765026,11.745231,11.691744,11.615487,11.555641,11.497897,11.442154,11.351897,11.323487,11.185744,11.089744,11.004872,10.858103,10.744051,10.628564,10.522923,10.411641,10.325026,10.30559,10.225077,10.193744
std,10.386364,10.1503,10.088175,10.414815,10.690279,10.541587,10.114306,9.930711,9.834637,9.790869,9.727947,9.722527,9.643783,9.548371,9.454977,9.516667,9.273664,9.076855,8.882929,8.55968,8.432243,8.239422,8.008242,7.856481,7.661756,7.50417,7.314543,7.209089
min,1.33,1.3,1.4,1.56,1.69,1.84,1.86,1.93,1.95,1.99,2.02,2.05,2.14,2.02,1.92,2.25,2.29,2.3,2.37,2.37,2.4,2.4,2.41,2.46,2.52,2.58,2.64,2.67
25%,5.13,5.19,5.3,5.42,5.425,5.505,5.615,5.67,5.715,5.615,5.555,5.52,5.5,5.515,5.585,5.515,5.615,5.61,5.65,5.65,5.63,5.545,5.515,5.435,5.43,5.405,5.44,5.51
50%,8.35,8.53,8.59,8.73,8.66,8.52,8.59,8.59,8.73,8.34,8.27,8.24,8.07,8.08,7.93,7.94,7.63,7.68,7.52,7.66,7.74,7.71,7.79,7.82,7.83,7.87,7.85,7.83
75%,14.175,14.15,14.44,14.75,14.73,14.8,14.565,14.67,14.99,15.04,14.935,15.0,15.025,14.55,14.19,13.735,13.445,13.5,13.48,13.405,13.32,12.67,12.675,12.645,12.69,12.63,12.625,12.695
max,109.3,103.8,98.7,94.88,91.48,88.25,86.35,85.09,83.3,81.22,79.16,78.35,77.06,75.78,75.57,74.78,73.92,72.04,71.39,70.68,69.81,68.01,66.43,65.65,64.65,59.53,58.28,57.61


In [45]:
# Set Country as index of Dataframe
Suicide_Rate_fromVivi.set_index('Country Name', inplace=True)
Suicide_Rate_fromVivi.sample(5)

Unnamed: 0_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Greenland,109.3,103.8,98.7,94.88,91.48,88.25,86.35,85.09,83.3,81.22,79.16,78.35,77.06,75.78,75.57,74.78,73.92,72.04,71.39,70.68,69.81,68.01,66.43,65.65,64.65,59.53,58.28,57.61
United Kingdom,10.76,10.65,10.45,10.48,10.16,10.3,10.21,10.01,10.07,9.97,9.69,9.44,9.31,9.13,8.79,8.63,8.86,8.77,8.92,8.79,8.68,8.47,8.29,8.49,8.44,8.59,8.69,8.67
Vanuatu,18.32,18.2,18.11,17.98,17.8,17.66,17.45,17.29,17.07,16.92,16.71,16.63,16.61,16.55,16.48,16.45,16.37,16.39,16.33,16.27,16.19,16.11,16.04,15.95,15.9,15.86,15.83,15.81
Northern Mariana Islands,11.75,11.58,11.29,11.01,10.76,10.58,10.48,10.4,10.31,9.93,9.75,10.22,10.08,10.05,9.87,9.86,9.84,9.96,10.23,10.43,10.59,10.78,10.99,11.21,11.56,12.08,12.38,12.67
Chile,22.04,19.96,18.53,18.63,18.07,17.76,16.92,15.73,15.0,14.12,13.03,12.92,12.22,12.2,12.2,12.01,12.03,12.29,12.47,12.96,12.52,12.03,11.58,11.46,11.35,11.38,11.47,11.5


In [46]:
# Create new Suicide Rate over time (year) Dataframe with only Y2015-2017 
# to correlate Happiness Daya Y2015-2017

SuicideRate_2015_17 = Suicide_Rate_fromVivi.loc[:, '2015':'2017']
SuicideRate_2015_17.sample(5)

Unnamed: 0_level_0,2015,2016,2017
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Syria,2.58,2.64,2.69
Egypt,4.84,4.9,4.93
Saint Lucia,7.36,7.69,7.87
Portugal,12.48,12.51,12.72
Qatar,5.39,5.28,5.29


In [47]:
# Display Statistic Overview
SuicideRate_2015_17.describe()

Unnamed: 0,2015,2016,2017
count,195.0,195.0,195.0
mean,10.30559,10.225077,10.193744
std,7.50417,7.314543,7.209089
min,2.58,2.64,2.67
25%,5.405,5.44,5.51
50%,7.87,7.85,7.83
75%,12.63,12.625,12.695
max,59.53,58.28,57.61


### Select Variables from clean Happiness Data Y2015-2017

In [77]:
all_happiness.head(1)

Unnamed: 0,Country,Region_x,2015 Happiness Rank,2015 Happiness Score,2015 Standard Error,2015 Economy (GDP per Capita),2015 Family,2015 Health (Life Expectancy),2015 Freedom,2015 Trust (Government Corruption),2015 Generosity,2015 Dystopia Residual,Region_y,2016 Happiness Rank,2016 Happiness Score,2016 Lower Confidence Interval,2016 Upper Confidence Interval,2016 Economy (GDP per Capita),2016 Family,2016 Health (Life Expectancy),2016 Freedom,2016 Trust (Government Corruption),2016 Generosity,2016 Dystopia Residual,2017 Happiness Rank,2017 Happiness Score,2017 Whisker.high,2017 Whisker.low,2017 Economy (GDP per Capita),2017 Family,2017 Health (Life Expectancy),2017 Freedom,2017 Generosity,2017 Trust (Government Corruption),2017 Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716


In [76]:
# Filter Dataframe showing Happiness Score per Year
happyScore_2015_17 = all_happiness.loc[: , ['Country', '2015 Happiness Rank', '2015 Happiness Score',
                                            '2016 Happiness Rank', '2016 Happiness Score',
                                            '2017 Happiness Rank', '2017 Happiness Score']]
happyScore_2015_17.head()

Unnamed: 0,Country,2015 Happiness Rank,2015 Happiness Score,2016 Happiness Rank,2016 Happiness Score,2017 Happiness Rank,2017 Happiness Score
0,Switzerland,1,7.587,2,7.509,4,7.494
1,Iceland,2,7.561,3,7.501,3,7.504
2,Denmark,3,7.527,1,7.526,2,7.522
3,Norway,4,7.522,4,7.498,1,7.537
4,Canada,5,7.427,6,7.404,7,7.316


# Analysis
----
* Line graph: x-axis: year; y-axes: depressive score, happiness score, suicide score  (one chart per region)?
---
* Scatter plot with regression analysis
    * x-axis: depressive score; y-axis: suicide rate
    * x-axis: happiness index; y-axis: suicide rate
---

In [78]:
depress_cohort.head()

Year,2015,2016,2017
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,4046.831685,4044.720378,4042.996929
Albania,2102.716655,2103.861099,2104.971611
Algeria,3425.956529,3434.270011,3445.182117
American Samoa,2817.635241,2818.923237,2821.194026
Andorra,3417.22746,3415.914479,3418.14286


In [79]:
happyScore_2015_17.head()

Unnamed: 0,Country,2015 Happiness Rank,2015 Happiness Score,2016 Happiness Rank,2016 Happiness Score,2017 Happiness Rank,2017 Happiness Score
0,Switzerland,1,7.587,2,7.509,4,7.494
1,Iceland,2,7.561,3,7.501,3,7.504
2,Denmark,3,7.527,1,7.526,2,7.522
3,Norway,4,7.522,4,7.498,1,7.537
4,Canada,5,7.427,6,7.404,7,7.316


In [83]:
suicide_cohort.head(1)

Year,2015,2016,2017
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,9.319114,9.2516,9.188568


In [84]:
test_1 = suicide_cohort.loc['Entity': 'Switzerland','Iceland','Denmark','Norway','Canada'}, :]
test_1

SyntaxError: invalid syntax (<ipython-input-84-9b8e087a0d7f>, line 1)