In [168]:
import pandas as pd
from modsim import *
import matplotlib.pyplot as plt
import numpy as np

In [169]:
# install Pint if necessary
try:
  from pint import UnitRegistry
except ImportError:
  !pip install pint

In [170]:
# download modsim.py if necessary

from os.path import basename, exists


def download(url):
  filename = basename(url)
  if not exists(filename):
    from urllib.request import urlretrieve
    local, _ = urlretrieve(url, filename)
    print('Downloaded ' + local)


download('https://github.com/AllenDowney/ModSimPy/raw/master/' +
         'modsim.py')

In [171]:
# Menghapus kolom yang tidak diperlukan
data_have_index = [
  "sample/anxiety-disorders-males-vs-females.csv",
  "sample/bipolar-disorder-in-males-vs-females.csv",
  "sample/depression-males-vs-females.csv",
  "sample/eating-disorders-in-males-vs-females.csv",
  "sample/Mental health Depression disorder Data.csv",
  "sample/schizophrenia-in-males-vs-females.csv"
]

for file_path in data_have_index:
  df = pd.read_csv(file_path)
  if 'index' in df.columns:
    df = df.drop('index', axis=1)
  df.to_csv(file_path, index=False)

  df = pd.read_csv(file_path)


In [172]:
df_grouped_bundir = pd.read_csv("sample/suicide-rates-among-young-people.csv")
df_grouped_bundir = df_grouped_bundir.groupby('Year').mean().reset_index()

  df_grouped_bundir = df_grouped_bundir.groupby('Year').mean().reset_index()


In [173]:
df_grouped_bundir.columns

Index(['Year',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34'],
      dtype='object')

In [174]:
df_grouped_bundir['Change_15_19'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19']
df_grouped_bundir['Change_20_24'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24']
df_grouped_bundir['Change_25_34'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']

df_grouped_bundir['Percentage_Bundir'] = ((df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']) / 300000) * 100

print(f"Minimal: {df_grouped_bundir[['Percentage_Bundir']].min()}")
print(f"Maximal: {df_grouped_bundir[['Percentage_Bundir']].max()}")

Minimal: Percentage_Bundir    0.008697
dtype: float64
Maximal: Percentage_Bundir    0.011785
dtype: float64


In [198]:
# Anxiety
df_anxiety = pd.read_csv("sample/anxiety-disorders-males-vs-females.csv")
df_anxiety.isnull().sum()

Entity                                                                                0
Code                                                                               2316
Year                                                                                  0
Prevalence - Anxiety disorders - Sex: Male - Age: Age-standardized (Percent)      49555
Prevalence - Anxiety disorders - Sex: Female - Age: Age-standardized (Percent)    49555
Population (historical estimates)                                                   739
Continent                                                                         56110
dtype: int64

In [200]:
df_anxiety = df_anxiety.fillna(method='ffill')
df_anxiety.isnull().sum()

Entity                                                                            0
Code                                                                              0
Year                                                                              0
Prevalence - Anxiety disorders - Sex: Male - Age: Age-standardized (Percent)      1
Prevalence - Anxiety disorders - Sex: Female - Age: Age-standardized (Percent)    1
Population (historical estimates)                                                 1
Continent                                                                         0
dtype: int64

In [201]:
df_anxiety = df_anxiety.rename(
  columns={'Prevalence - Anxiety disorders - Sex: Female - Age: Age-standardized (Percent)': 'Anxiety_Female',
           'Prevalence - Anxiety disorders - Sex: Male - Age: Age-standardized (Percent)': 'Anxiety_Male'})

df_anxiety_filtered = df_anxiety[(df_anxiety['Year'] >= 1990) & (df_anxiety['Year'] <= 2017)]

df_anxiety_mean = df_anxiety_filtered.groupby('Year').agg({
  'Anxiety_Male': 'mean',
  'Anxiety_Female': 'mean'
})

total_df_anxiety_mean = df_anxiety_mean['Anxiety_Female'] + df_anxiety_mean['Anxiety_Male']

total_df_anxiety_mean

Year
1990    8.597020
1991    8.577117
1992    8.580791
1993    8.584431
1994    8.587703
1995    8.590236
1996    8.597522
1997    8.612425
1998    8.630520
1999    8.647335
2000    8.681072
2001    8.686826
2002    8.691382
2003    8.695295
2004    8.699017
2005    8.702923
2006    8.713647
2007    8.733901
2008    8.757902
2009    8.779832
2010    8.793758
2011    8.802289
2012    8.811699
2013    8.821406
2014    8.830757
2015    8.813362
2016    8.843971
2017    8.847163
dtype: float64

In [203]:
#Bipolar
df_bipolar = pd.read_csv("sample/bipolar-disorder-in-males-vs-females.csv")
df_bipolar.isnull().sum()

Entity                                                                               0
Code                                                                              2316
Year                                                                                 0
Prevalence - Bipolar disorder - Sex: Male - Age: Age-standardized (Percent)      49555
Prevalence - Bipolar disorder - Sex: Female - Age: Age-standardized (Percent)    49555
Population (historical estimates)                                                  739
Continent                                                                        56110
dtype: int64

In [204]:
df_bipolar = df_bipolar.fillna(method='ffill')
df_bipolar.isnull().sum()

Entity                                                                           0
Code                                                                             0
Year                                                                             0
Prevalence - Bipolar disorder - Sex: Male - Age: Age-standardized (Percent)      1
Prevalence - Bipolar disorder - Sex: Female - Age: Age-standardized (Percent)    1
Population (historical estimates)                                                1
Continent                                                                        0
dtype: int64

In [205]:
df_bipolar = df_bipolar.rename(
  columns={'Prevalence - Bipolar disorder - Sex: Male - Age: Age-standardized (Percent)': 'Bipolar_Male',
           'Prevalence - Bipolar disorder - Sex: Female - Age: Age-standardized (Percent)': 'Bipolar_Female'})

df_bipolar_filtered = df_bipolar[(df_bipolar['Year'] >= 1990) & (df_bipolar['Year'] <= 2017)]

df_bipolar_mean = df_bipolar_filtered.groupby('Year').agg({
  'Bipolar_Male': 'mean',
  'Bipolar_Female': 'mean'
})

total_df_bipolar_mean = df_bipolar_mean['Bipolar_Male'] + df_bipolar_mean['Bipolar_Female']

total_df_bipolar_mean

Year
1990    1.352318
1991    1.349416
1992    1.349923
1993    1.350428
1994    1.350914
1995    1.351366
1996    1.351799
1997    1.352259
1998    1.352736
1999    1.353219
2000    1.357046
2001    1.357960
2002    1.359527
2003    1.361330
2004    1.362937
2005    1.363902
2006    1.364416
2007    1.364993
2008    1.365593
2009    1.366167
2010    1.366647
2011    1.367089
2012    1.367591
2013    1.368116
2014    1.368614
2015    1.353710
2016    1.369390
2017    1.369737
dtype: float64

In [207]:
#Depression
df_depression = pd.read_csv("sample/depression-males-vs-females.csv")
df_depression.isnull().sum()

Entity                                                                                   0
Code                                                                                  2316
Year                                                                                     0
Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)      49555
Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)    49555
Population (historical estimates)                                                      739
Continent                                                                            56110
dtype: int64

In [208]:
df_depression = df_depression.fillna(method='ffill')
df_depression.isnull().sum()

Entity                                                                               0
Code                                                                                 0
Year                                                                                 0
Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)      1
Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)    1
Population (historical estimates)                                                    1
Continent                                                                            0
dtype: int64

In [186]:
df_depression = df_depression.rename(
  columns={'Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)' : 'Depression_Female', 'Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)' : 'Depression_Male'})

df_depression_filtered = df_depression[(df_depression['Year'] >= 1990) & (df_depression['Year'] <= 2017)]

df_depression_mean = df_depression_filtered.groupby('Year').agg({
  'Depression_Male': 'mean',
  'Depression_Female': 'mean'
})

total_df_depression_mean = df_depression_mean['Depression_Male'] + df_depression_mean['Depression_Female']

total_df_depression_mean

KeyError: "Column(s) ['Depression_Female', 'Depression_Male'] do not exist"