In [1]:
## Dependencies
import os
import pandas as pd
import numpy as np

## Suicide Rate of Change (pct_change)

In [2]:
## Set input file
filename = 'suicide_data.csv'
path = os.path.join('output', filename)

In [3]:
## Load CSV as DataFrame
df = pd.read_csv(path, low_memory=False)

In [4]:
df.head()

Unnamed: 0,country,year,country_year,alcohol_consumption,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,Afghanistan,1990,Afghanistan-1990,0.0,10.318504,17.553463,50.331,48.88,15.241,,,12412311.0,
1,Afghanistan,1991,Afghanistan-1991,0.0,10.32701,17.837032,50.999,48.763,14.783,,,13299016.0,11.38
2,Afghanistan,1992,Afghanistan-1992,0.0,10.271411,18.092542,51.641,48.709,14.362,,,14485543.0,11.46
3,Afghanistan,1993,Afghanistan-1993,0.0,10.376123,18.294931,52.256,48.717,13.974,,,15816601.0,11.61
4,Afghanistan,1994,Afghanistan-1994,0.0,10.575915,18.428908,52.842,48.77,13.616,,,17075728.0,11.65


In [5]:
## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 164
Shape: (4770, 13)


In [6]:
## Explore data
df.dtypes

country                        object
year                            int64
country_year                   object
alcohol_consumption           float64
suicide_rate_owid             float64
mental_substance_disorders    float64
life_expectancy               float64
birth_rate                    float64
death_rate                    float64
gdp                           float64
health_pct                    float64
population                    float64
unemployment                  float64
dtype: object

In [7]:
## Drop NaN
df = df.dropna()

## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 156
Shape: (2948, 13)


In [8]:
## Drop Zeros
zero_loc = (df != 0).any(axis=1)
# zero_loc.value_counts()

df = df.loc[zero_loc]

## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 156
Shape: (2948, 13)


In [9]:
df = df.sort_values(by=['country', 'year'])
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,country,year,country_year,alcohol_consumption,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,Afghanistan,2005,Afghanistan-2005,0.009,10.655626,17.647911,58.29,44.723,10.003,242.031313,9.94829,25654274.0,11.52
1,Afghanistan,2006,Afghanistan-2006,0.011,10.538475,17.539914,58.826,43.87,9.645,263.733602,10.622766,26433058.0,11.34
2,Afghanistan,2007,Afghanistan-2007,0.009,10.391129,17.444318,59.375,42.944,9.287,359.693158,9.904675,27100542.0,11.18
3,Afghanistan,2008,Afghanistan-2008,0.018,10.219154,17.35711,59.93,41.949,8.932,364.660679,10.256495,27722281.0,11.11
4,Afghanistan,2009,Afghanistan-2009,0.01,10.036657,17.275369,60.484,40.903,8.584,438.076142,9.818487,28394806.0,11.46


In [10]:
df.columns

Index(['country', 'year', 'country_year', 'alcohol_consumption',
       'suicide_rate_owid', 'mental_substance_disorders', 'life_expectancy',
       'birth_rate', 'death_rate', 'gdp', 'health_pct', 'population',
       'unemployment'],
      dtype='object')

In [11]:
## Select columns with data (target & features)
data_cols = ['suicide_rate_owid',
             'alcohol_consumption',
             'mental_substance_disorders',
             'life_expectancy',
             'birth_rate',
             'death_rate',
             'gdp',
             'health_pct',
             'population',
             'unemployment']

## Pct_change by country

In [12]:
country_list = df['country'].unique()
len(country_list)

156

In [13]:
## Create global DataFrame
roc_df = pd.DataFrame(columns=data_cols)

## ROC by country
for country in country_list:
    country_df = df.loc[df['country'] == country]
    country_roc = country_df[data_cols].pct_change().dropna()
    
    ## Append to global DataFrame
    roc_df = roc_df.append(country_roc, ignore_index=True)

In [14]:
## Global year-to-year rate of change (%)
roc_df

Unnamed: 0,suicide_rate_owid,alcohol_consumption,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,-0.010994,0.222222,-0.006120,0.009195,-0.019073,-0.035789,0.089667,0.067798,0.030357,-0.015625
1,-0.013982,-0.181818,-0.005450,0.009333,-0.021108,-0.037118,0.363850,-0.067599,0.025252,-0.014109
2,-0.016550,1.000000,-0.004999,0.009347,-0.023170,-0.038225,0.013810,0.035521,0.022942,-0.006261
3,-0.017858,-0.444444,-0.004709,0.009244,-0.024935,-0.038961,0.201325,-0.042705,0.024259,0.031503
4,-0.016068,0.000000,-0.004163,0.008994,-0.026257,-0.038910,0.240202,-0.127190,0.027847,0.005236
...,...,...,...,...,...,...,...,...,...,...
2577,-0.033555,0.000000,0.000965,0.033889,-0.014438,-0.094206,0.095811,0.027723,0.017936,0.000000
2578,-0.026644,0.200000,0.001184,0.026592,-0.022087,-0.082847,0.003425,0.143932,0.017702,-0.009294
2579,-0.014797,0.000000,0.001209,0.019243,-0.028504,-0.065107,0.007090,-0.083784,0.016776,-0.007505
2580,-0.013144,-0.166667,0.001584,0.012766,-0.032871,-0.044621,0.013507,0.026242,0.015614,-0.009452


In [15]:
## Shape & Summary
print("Shape:", roc_df.shape)
roc_df.describe()

Shape: (2582, 10)


Unnamed: 0,suicide_rate_owid,alcohol_consumption,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
count,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0
mean,-0.009456,inf,-0.000634,0.004933,-0.004564,-0.006419,0.069438,0.0142,0.035627,0.005652
std,0.029681,,0.001737,0.006247,0.105245,0.079596,0.163298,0.107539,0.333655,0.194831
min,-0.169668,-1.0,-0.011381,-0.035607,-0.60084,-0.480612,-0.640223,-0.443989,-0.819465,-0.70297
25%,-0.022097,-0.213955,-0.001274,0.001784,-0.017733,-0.023389,0.0,-0.028865,0.002863,-0.050592
50%,-0.006832,0.0,-0.000223,0.003471,-0.010316,-0.004589,0.051484,0.000587,0.012823,-0.003123
75%,0.001219,0.066667,0.000232,0.006495,0.0,0.003588,0.131798,0.04501,0.024034,0.03059
max,0.413126,inf,0.007115,0.069977,1.469579,0.932367,2.260946,1.349113,4.544366,5.8125


In [16]:
## Remove infinite values
roc_df.replace([np.inf, -np.inf], np.nan, inplace=True)
clean_df = roc_df.dropna()
clean_df

Unnamed: 0,suicide_rate_owid,alcohol_consumption,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,-0.010994,0.222222,-0.006120,0.009195,-0.019073,-0.035789,0.089667,0.067798,0.030357,-0.015625
1,-0.013982,-0.181818,-0.005450,0.009333,-0.021108,-0.037118,0.363850,-0.067599,0.025252,-0.014109
2,-0.016550,1.000000,-0.004999,0.009347,-0.023170,-0.038225,0.013810,0.035521,0.022942,-0.006261
3,-0.017858,-0.444444,-0.004709,0.009244,-0.024935,-0.038961,0.201325,-0.042705,0.024259,0.031503
4,-0.016068,0.000000,-0.004163,0.008994,-0.026257,-0.038910,0.240202,-0.127190,0.027847,0.005236
...,...,...,...,...,...,...,...,...,...,...
2577,-0.033555,0.000000,0.000965,0.033889,-0.014438,-0.094206,0.095811,0.027723,0.017936,0.000000
2578,-0.026644,0.200000,0.001184,0.026592,-0.022087,-0.082847,0.003425,0.143932,0.017702,-0.009294
2579,-0.014797,0.000000,0.001209,0.019243,-0.028504,-0.065107,0.007090,-0.083784,0.016776,-0.007505
2580,-0.013144,-0.166667,0.001584,0.012766,-0.032871,-0.044621,0.013507,0.026242,0.015614,-0.009452


In [18]:
## Export to CSV
clean_df.to_csv("output/suicide_roc.csv", index=False)