In [1]:
## Dependencies
import os
import pandas as pd

## Suicide Rate of Change (pct_change)

In [2]:
## Set input file
filename = 'extract_owid_and_wb.csv'
path = os.path.join('clean', filename)

In [3]:
## Load CSV as DataFrame
df = pd.read_csv(path, low_memory=False)

In [4]:
df.head()

Unnamed: 0,country,year,country_code,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,Afghanistan,1990,Afghanistan-1990,10.318504,17.553463,50.331,48.88,15.241,,,12412311.0,
1,Afghanistan,1991,Afghanistan-1991,10.32701,17.837032,50.999,48.763,14.783,,,13299016.0,11.38
2,Afghanistan,1992,Afghanistan-1992,10.271411,18.092542,51.641,48.709,14.362,,,14485543.0,11.46
3,Afghanistan,1993,Afghanistan-1993,10.376123,18.294931,52.256,48.717,13.974,,,15816601.0,11.61
4,Afghanistan,1994,Afghanistan-1994,10.575915,18.428908,52.842,48.77,13.616,,,17075728.0,11.65


In [5]:
## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 172
Shape: (4748, 12)


In [6]:
## Explore data
df.dtypes

country                        object
year                            int64
country_code                   object
suicide_rate_owid             float64
mental_substance_disorders    float64
life_expectancy               float64
birth_rate                    float64
death_rate                    float64
gdp                           float64
health_pct                    float64
population                    float64
unemployment                  float64
dtype: object

In [7]:
## Drop NaN
df = df.dropna()

## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 159
Shape: (2834, 12)


In [8]:
## Drop Zeros
zero_loc = (df != 0).any(axis=1)
# zero_loc.value_counts()

df = df.loc[zero_loc]

## Number of Countries & Shape
print("Countries:", df['country'].nunique())
print("Shape:", df.shape)

Countries: 159
Shape: (2834, 12)


In [9]:
df = df.sort_values(by=['country', 'year'])
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,country,year,country_code,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,health_pct,population,unemployment
0,Afghanistan,2002,Afghanistan-2002,11.054472,18.137951,56.784,46.901,11.048,179.426579,9.44339,22600774.0,11.68
1,Afghanistan,2003,Afghanistan-2003,10.931093,17.958849,57.271,46.231,10.704,190.683814,8.941258,23680871.0,11.68
2,Afghanistan,2004,Afghanistan-2004,10.83979,17.788825,57.772,45.507,10.356,211.382074,9.808474,24726689.0,11.61
3,Afghanistan,2005,Afghanistan-2005,10.655626,17.647911,58.29,44.723,10.003,242.031313,9.94829,25654274.0,11.52
4,Afghanistan,2006,Afghanistan-2006,10.538475,17.539914,58.826,43.87,9.645,263.733602,10.622766,26433058.0,11.34


In [10]:
df.columns

Index(['country', 'year', 'country_code', 'suicide_rate_owid',
       'mental_substance_disorders', 'life_expectancy', 'birth_rate',
       'death_rate', 'gdp', 'health_pct', 'population', 'unemployment'],
      dtype='object')

In [11]:
## Select columns with data (target & features)
data_cols = ['suicide_rate_owid',
             'mental_substance_disorders',
             'life_expectancy',
             'birth_rate',
             'death_rate',
             'gdp',
             'population',
             'unemployment',
             'health_pct']

## Pct_change by country

In [12]:
country_list = df['country'].unique()
len(country_list)

159

In [13]:
## Create global DataFrame
roc_df = pd.DataFrame(columns=data_cols)

## ROC by country
for country in country_list:
    country_df = df.loc[df['country'] == country]
    country_roc = country_df[data_cols].pct_change().dropna()
    
    ## Append to global DataFrame
    roc_df = roc_df.append(country_roc, ignore_index=True)

In [14]:
## Global year-to-year rate of change (%)
roc_df

Unnamed: 0,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,population,unemployment,health_pct
0,-0.011161,-0.009874,0.008576,-0.014285,-0.031137,0.062740,0.047790,0.000000,-0.053173
1,-0.008353,-0.009467,0.008748,-0.015660,-0.032511,0.108548,0.044163,-0.005993,0.096990
2,-0.016990,-0.007921,0.008966,-0.017228,-0.034087,0.144995,0.037514,-0.007752,0.014255
3,-0.010994,-0.006120,0.009195,-0.019073,-0.035789,0.089667,0.030357,-0.015625,0.067798
4,-0.013982,-0.005450,0.009333,-0.021108,-0.037118,0.363850,0.025252,-0.014109,-0.067599
...,...,...,...,...,...,...,...,...,...
2670,-0.033555,0.000965,0.033889,-0.014438,-0.094206,0.095811,0.017936,0.000000,0.027723
2671,-0.026644,0.001184,0.026592,-0.022087,-0.082847,0.003425,0.017702,-0.009294,0.143932
2672,-0.014797,0.001209,0.019243,-0.028504,-0.065107,0.007090,0.016776,-0.007505,-0.083784
2673,-0.013144,0.001584,0.012766,-0.032871,-0.044621,0.013507,0.015614,-0.009452,0.026242


In [15]:
## Shape & Summary
print("Shape:", roc_df.shape)
roc_df.describe()

Shape: (2675, 9)


Unnamed: 0,suicide_rate_owid,mental_substance_disorders,life_expectancy,birth_rate,death_rate,gdp,population,unemployment,health_pct
count,2675.0,2675.0,2675.0,2675.0,2675.0,2675.0,2675.0,2675.0,2675.0
mean,-0.010692,-0.000709,0.005471,-0.009637,-0.010181,0.072694,0.015518,0.004607,0.015221
std,0.029556,0.001799,0.006091,0.021118,0.024079,0.136647,0.016554,0.189547,0.108067
min,-0.169668,-0.010424,-0.035607,-0.138686,-0.133333,-0.640223,-0.037746,-0.805556,-0.443989
25%,-0.023397,-0.001476,0.002238,-0.018552,-0.024478,0.006146,0.005234,-0.059596,-0.030245
50%,-0.009074,-0.000387,0.003973,-0.011367,-0.009213,0.063959,0.013566,-0.007792,0.006485
75%,0.001347,0.000308,0.007101,-0.003252,0.003859,0.138676,0.024548,0.037356,0.048239
max,0.413126,0.007115,0.04455,0.144928,0.098361,1.884354,0.191392,5.8125,1.478131


In [16]:
## Export to CSV
roc_df.to_csv("output/suicide_roc.csv", index=False)