In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
%matplotlib inline


In [2]:
file_path = "data/master.csv"

data = pd.read_csv(file_path)


In [3]:
data.head(30)

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers
5,Albania,1987,female,75+ years,1,35600,2.81,Albania1987,,2156624900,796,G.I. Generation
6,Albania,1987,female,35-54 years,6,278800,2.15,Albania1987,,2156624900,796,Silent
7,Albania,1987,female,25-34 years,4,257200,1.56,Albania1987,,2156624900,796,Boomers
8,Albania,1987,male,55-74 years,1,137500,0.73,Albania1987,,2156624900,796,G.I. Generation
9,Albania,1987,female,5-14 years,0,311000,0.0,Albania1987,,2156624900,796,Generation X


In [4]:
data = data.rename(columns={'country': 'Country', 'year': 'Year', 'sex': 'Gender', 'age': "Age Group", 'suicides_no': 'Suicides', 'population': 'Population', 'suicides/100k pop': 'Suicide per 100k', 'generation':'Generation'})
data.drop(columns=['country-year'])
data.head()

Unnamed: 0,Country,Year,Gender,Age Group,Suicides,Population,Suicide per 100k,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),Generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers


## General Numbers

In [5]:
# Total Suicides

total = data['Suicides'].sum()
total_suicides = pd.DataFrame({"Total Suicides": [total]})

total_suicides

Unnamed: 0,Total Suicides
0,6748420


In [6]:
data.columns

Index(['Country', 'Year', 'Gender', 'Age Group', 'Suicides', 'Population',
       'Suicide per 100k', 'country-year', 'HDI for year',
       ' gdp_for_year ($) ', 'gdp_per_capita ($)', 'Generation'],
      dtype='object')

## Country Analysis

In [7]:
df1 = data[['Country', 'Year','Suicides']]
df1 = df1.groupby(['Country','Year'],as_index=False).sum()
df1

Unnamed: 0,Country,Year,Suicides
0,Albania,1987,73
1,Albania,1988,63
2,Albania,1989,68
3,Albania,1992,47
4,Albania,1993,73
...,...,...,...
2316,Uzbekistan,2010,1464
2317,Uzbekistan,2011,1640
2318,Uzbekistan,2012,1835
2319,Uzbekistan,2013,1950


In [8]:
df1b = df1.copy()
df1b['Country_No'] = 1
df1b = df1b.groupby('Year').sum()
df1b

Unnamed: 0_level_0,Suicides,Country_No
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1985,116063,48
1986,120670,48
1987,126842,54
1988,121026,49
1989,160244,52
1990,193361,64
1991,198020,64
1992,211473,65
1993,221565,65
1994,232063,68


In [9]:
#Drop 2016
drop = df1[df1['Year']==2016].index.tolist()
df1 = df1.drop(index=df1.index[drop])


In [10]:
df2 = df1.copy()
df2['Year']=df2['Year'].apply(lambda x: str(x))
df3 = df2.set_index(['Year', 'Country'])
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,Suicides
Year,Country,Unnamed: 2_level_1
1987,Albania,73
1988,Albania,63
1989,Albania,68
1992,Albania,47
1993,Albania,73
...,...,...
2010,Uzbekistan,1464
2011,Uzbekistan,1640
2012,Uzbekistan,1835
2013,Uzbekistan,1950


In [11]:
# Unstacking
df4 = df2.set_index(['Year','Country']).unstack()['Suicides']
df4


Country,Albania,Antigua and Barbuda,Argentina,Armenia,Aruba,Australia,Austria,Azerbaijan,Bahamas,Bahrain,...,Thailand,Trinidad and Tobago,Turkey,Turkmenistan,Ukraine,United Arab Emirates,United Kingdom,United States,Uruguay,Uzbekistan
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1985,,0.0,1988.0,,,1861.0,2091.0,,1.0,11.0,...,2982.0,29.0,,,,,5105.0,29446.0,287.0,
1986,,0.0,2284.0,,,2044.0,2139.0,,,,...,2798.0,104.0,,,,,4839.0,30892.0,262.0,
1987,73.0,0.0,2286.0,,,2164.0,2069.0,,3.0,9.0,...,3035.0,119.0,,272.0,10050.0,,4594.0,30783.0,258.0,
1988,63.0,0.0,2354.0,,,2172.0,1851.0,,,14.0,...,,164.0,,270.0,9751.0,,4971.0,30388.0,268.0,
1989,68.0,0.0,2247.0,,,2066.0,1898.0,,,,...,,173.0,,253.0,10887.0,,4361.0,30218.0,344.0,
1990,,1.0,2140.0,93.0,,2202.0,1825.0,114.0,,,...,3753.0,170.0,,298.0,10645.0,,4643.0,30895.0,315.0,1459.0
1991,,0.0,1951.0,80.0,,2288.0,1769.0,134.0,,,...,3559.0,148.0,,273.0,10700.0,,4547.0,30790.0,,1393.0
1992,47.0,0.0,2184.0,83.0,,2251.0,1759.0,144.0,,,...,3550.0,163.0,,271.0,11666.0,,4628.0,30471.0,,1344.0
1993,73.0,1.0,2230.0,106.0,,2038.0,1704.0,119.0,7.0,,...,,174.0,,240.0,12469.0,,4462.0,31084.0,418.0,1355.0
1994,50.0,0.0,2241.0,112.0,,2283.0,1776.0,47.0,3.0,,...,2307.0,148.0,,234.0,13826.0,,4380.0,31123.0,406.0,1419.0


In [12]:
df5 = df4.fillna(0)

In [13]:
country_df = df5


## Scatter Plot - Suicides per Year by Country

In [14]:
overall     = pd.DataFrame(data.groupby('Year')['Suicides'].sum())
pop_overall = pd.DataFrame(data.groupby('Year')['Population'].sum())

In [27]:
# drop = df1[df1['Year']==2016].index.tolist()
# df1 = df1.drop(index=df1.index[drop])

drop = data[data['Year']==2016].index.tolist()
data = data.drop(index=data.index[drop])
data.head(200)

Unnamed: 0,Country,Year,Gender,Age Group,Suicides,Population,Suicide per 100k,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),Generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers
...,...,...,...,...,...,...,...,...,...,...,...,...
195,Albania,2005,female,5-14 years,0,276559,0.00,Albania2005,0.695,8158548717,2931,Millenials
196,Albania,2005,female,55-74 years,0,210998,0.00,Albania2005,0.695,8158548717,2931,Silent
197,Albania,2005,female,75+ years,0,53191,0.00,Albania2005,0.695,8158548717,2931,Silent
198,Albania,2005,male,15-24 years,0,281675,0.00,Albania2005,0.695,8158548717,2931,Millenials


In [22]:
fig = make_subplots(rows = 2, cols=1)

fig.append_trace(go.Scatter(
                            x = overall.index,
                            y = overall['Suicides'],
                            name = 'Number of Suicies',
                            mode = 'lines+markers',
                            marker=dict(color="red")
                            ), row=1, col=1)

fig.append_trace(go.Scatter(
                            x = pop_overall.index,
                            y = pop_overall['Population'],
                            name = 'Population',
                            mode = 'lines+markers',
                            marker=dict(color="#00CC96")
                            ), row=2, col=1)


fig.update_layout(height=600, width=800, title = 'Population V/S Total Suicides Trend')
fig.update_xaxes(title_text = 'Year', row=1, col=1)
fig.update_xaxes(title_text = 'Year', row=2, col=1)
fig.update_yaxes(title_text='Suicides', row=1, col=1)
fig.update_yaxes(title_text='Population', row=2, col=1)

fig.show()