In [168]:
import pandas as pd
from modsim import *
import matplotlib.pyplot as plt
import numpy as np

In [169]:
# install Pint if necessary
try:
  from pint import UnitRegistry
except ImportError:
  !pip install pint

In [170]:
# download modsim.py if necessary

from os.path import basename, exists


def download(url):
  filename = basename(url)
  if not exists(filename):
    from urllib.request import urlretrieve
    local, _ = urlretrieve(url, filename)
    print('Downloaded ' + local)


download('https://github.com/AllenDowney/ModSimPy/raw/master/' +
         'modsim.py')

In [171]:
# Menghapus kolom yang tidak diperlukan
data_have_index = [
  "sample/anxiety-disorders-males-vs-females.csv",
  "sample/bipolar-disorder-in-males-vs-females.csv",
  "sample/depression-males-vs-females.csv",
  "sample/eating-disorders-in-males-vs-females.csv",
  "sample/Mental health Depression disorder Data.csv",
  "sample/schizophrenia-in-males-vs-females.csv"
]

for file_path in data_have_index:
  df = pd.read_csv(file_path)
  if 'index' in df.columns:
    df = df.drop('index', axis=1)
  df.to_csv(file_path, index=False)

  df = pd.read_csv(file_path)


In [172]:
df_grouped_bundir = pd.read_csv("sample/suicide-rates-among-young-people.csv")
df_grouped_bundir = df_grouped_bundir.groupby('Year').mean().reset_index()

  df_grouped_bundir = df_grouped_bundir.groupby('Year').mean().reset_index()


In [173]:
df_grouped_bundir.columns

Index(['Year',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24',
       'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34'],
      dtype='object')

In [174]:
df_grouped_bundir['Change_15_19'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19']
df_grouped_bundir['Change_20_24'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24']
df_grouped_bundir['Change_25_34'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']

df_grouped_bundir['Percentage_Bundir'] = ((df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']) / 300000) * 100

print(f"Minimal: {df_grouped_bundir[['Percentage_Bundir']].min()}")
print(f"Maximal: {df_grouped_bundir[['Percentage_Bundir']].max()}")

Minimal: Percentage_Bundir    0.008697
dtype: float64
Maximal: Percentage_Bundir    0.011785
dtype: float64


In [175]:
# Anxiety
df_anxiety = pd.read_csv("sample/anxiety-disorders-males-vs-females.csv")
df_anxiety = df_anxiety.rename(
  columns={'Prevalence - Anxiety disorders - Sex: Female - Age: Age-standardized (Percent)': 'Anxiety_Female',
           'Prevalence - Anxiety disorders - Sex: Male - Age: Age-standardized (Percent)': 'Anxiety_Male'})

df_anxiety_filtered = df_anxiety[(df_anxiety['Year'] >= 1990) & (df_anxiety['Year'] <= 2017)]

df_anxiety_mean = df_anxiety_filtered.groupby('Year').agg({
  'Anxiety_Male': 'mean',
  'Anxiety_Female': 'mean'
})

total_df_anxiety_mean = df_anxiety_mean['Anxiety_Female'] + df_anxiety_mean['Anxiety_Male']

total_df_anxiety_mean


Year
1990    8.472961
1991    8.476949
1992    8.481219
1993    8.485451
1994    8.489253
1995    8.492198
1996    8.500666
1997    8.517987
1998    8.539018
1999    8.558562
2000    8.571391
2001    8.578129
2002    8.583464
2003    8.588046
2004    8.592405
2005    8.596979
2006    8.609538
2007    8.633257
2008    8.661363
2009    8.687044
2010    8.703351
2011    8.713342
2012    8.724362
2013    8.735729
2014    8.746679
2015    8.756330
2016    8.762154
2017    8.765891
dtype: float64

In [176]:
#Bipolar
df_bipolar = pd.read_csv("sample/bipolar-disorder-in-males-vs-females.csv")
df_bipolar = df_bipolar.rename(
  columns={'Prevalence - Bipolar disorder - Sex: Male - Age: Age-standardized (Percent)': 'Bipolar_Male',
           'Prevalence - Bipolar disorder - Sex: Female - Age: Age-standardized (Percent)': 'Bipolar_Female'})

df_bipolar_filtered = df_bipolar[(df_bipolar['Year'] >= 1990) & (df_bipolar['Year'] <= 2017)]

df_bipolar_mean = df_bipolar_filtered.groupby('Year').agg({
  'Bipolar_Male': 'mean',
  'Bipolar_Female': 'mean'
})

total_df_bipolar_mean = df_bipolar_mean['Bipolar_Male'] + df_bipolar_mean['Bipolar_Female']

total_df_bipolar_mean

Year
1990    1.335648
1991    1.336212
1992    1.336802
1993    1.337388
1994    1.337954
1995    1.338479
1996    1.338982
1997    1.339516
1998    1.340072
1999    1.340633
2000    1.341185
2001    1.342255
2002    1.344090
2003    1.346201
2004    1.348083
2005    1.349213
2006    1.349815
2007    1.350491
2008    1.351194
2009    1.351865
2010    1.352427
2011    1.352945
2012    1.353534
2013    1.354148
2014    1.354732
2015    1.355223
2016    1.355640
2017    1.356047
dtype: float64

In [187]:
#Depression
df_depression = pd.read_csv("sample/depression-males-vs-females.csv")
df_depression = df_depression.rename(
  columns={'Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)' : 'Depression_Female', 'Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)' : 'Depression_Male'})

df_depression_filtered = df_depression[(df_depression['Year'] >= 1990) & (df_depression['Year'] <= 2017)]

df_depression_mean = df_depression_filtered.groupby('Year').agg({
  'Depression_Male': 'mean',
  'Depression_Female': 'mean'
})

total_df_depression_mean = df_depression_mean['Depression_Male'] + df_depression_mean['Depression_Female']

total_df_depression_mean

Year
1990    7.914651
1991    7.914349
1992    7.914588
1993    7.915142
1994    7.916107
1995    7.917522
1996    7.922799
1997    7.933081
1998    7.944269
1999    7.952575
2000    7.954099
2001    7.948923
2002    7.940162
2003    7.928963
2004    7.916635
2005    7.904365
2006    7.884976
2007    7.854854
2008    7.822119
2009    7.794169
2010    7.778017
2011    7.771880
2012    7.767307
2013    7.764528
2014    7.762887
2015    7.762909
2016    7.762051
2017    7.762562
dtype: float64

KeyError: "Column(s) ['Depression_Female', 'Depression_Male'] do not exist"