# Memisahkan Data Menjadi Train Data dan Test Data

## Import Library yang Dibutuhkan

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Membaca File CSV

In [2]:
df = pd.read_csv('Dataset-Mental-Disorders.csv')

# Menghapus extra spaces
df = df.applymap(lambda x: x.strip())

## Menghapus Kolom Index dan Class (Expoert Diagnose) 

In [3]:
# Menghapus kolom primary key
boolean_columns = [
    'Mood Swing', 'Suicidal thoughts',
    'Anorxia', 'Authority Respect',
    'Try-Explanation', 'Aggressive Response',
    'Ignore & Move-On', 'Nervous Break-down',
    'Admit Mistakes', 'Overthinking'
]

# Mengubah nilai YES NO menjadi True False
for col in boolean_columns:
    df = df.replace({col: {'YES': True, 'NO': False}})

df.drop('Patient Number', inplace = True, axis = 1)

In [4]:
df.describe()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
count,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120
unique,4,4,4,4,2,2,2,2,2,2,2,2,2,2,9,8,9,4
top,Usually,Seldom,Sometimes,Sometimes,False,False,False,False,False,False,False,True,False,True,5 From 10,4 From 10,6 From 10,Bipolar Type-2
freq,42,46,38,44,63,63,74,73,63,62,70,62,61,65,22,33,21,31


## Mengecek Jumlah data masing-masing object/value

In [5]:
obj_cols = df.columns
for col in obj_cols:
    print(f'Nilai unique dari kolom "{col}":')
    print(df[col].value_counts())
    print()

Nilai unique dari kolom "Sadness":
Usually       42
Sometimes     42
Most-Often    20
Seldom        16
Name: Sadness, dtype: int64

Nilai unique dari kolom "Euphoric":
Seldom        46
Sometimes     45
Usually       20
Most-Often     9
Name: Euphoric, dtype: int64

Nilai unique dari kolom "Exhausted":
Sometimes     38
Usually       34
Most-Often    30
Seldom        18
Name: Exhausted, dtype: int64

Nilai unique dari kolom "Sleep dissorder":
Sometimes     44
Usually       34
Most-Often    21
Seldom        21
Name: Sleep dissorder, dtype: int64

Nilai unique dari kolom "Mood Swing":
False    63
True     57
Name: Mood Swing, dtype: int64

Nilai unique dari kolom "Suicidal thoughts":
False    63
True     57
Name: Suicidal thoughts, dtype: int64

Nilai unique dari kolom "Anorxia":
False    74
True     46
Name: Anorxia, dtype: int64

Nilai unique dari kolom "Authority Respect":
False    73
True     47
Name: Authority Respect, dtype: int64

Nilai unique dari kolom "Try-Explanation":
False    

## Memilih Input Kolom Class (Expert Diagnose)

In [6]:
X = df.drop('Expert Diagnose', axis=1)
y = df['Expert Diagnose']
y.head()

0    Bipolar Type-2
1        Depression
2    Bipolar Type-1
3    Bipolar Type-2
4            Normal
Name: Expert Diagnose, dtype: object

In [7]:
from IPython.display import display_html
from itertools import chain,cycle
def display_side_by_side(*args,titles=cycle([''])):
    html_str=''
    for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
        html_str+='<th style="text-align:center"><td style="vertical-align:top">'
        html_str+=f'<h2 style="text-align: center;">{title}</h2>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        html_str+='</td></th>'
    display_html(html_str,raw=True)
  

## Memisahkan Data Training dan Data Testing

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=345)
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)
display_side_by_side(train_data.head(), test_data.head(), titles=["Data Train", "Data Test"])
train_data.describe()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
101,Sometimes,Seldom,Most-Often,Most-Often,False,True,False,False,False,False,True,False,True,False,1 From 10,6 From 10,3 From 10,Depression
20,Sometimes,Sometimes,Sometimes,Usually,True,False,False,True,True,True,False,True,True,True,6 From 10,2 From 10,3 From 10,Bipolar Type-2
85,Sometimes,Sometimes,Most-Often,Usually,True,True,True,True,False,True,True,True,True,True,2 From 10,4 From 10,1 From 10,Bipolar Type-2
44,Most-Often,Seldom,Most-Often,Most-Often,False,True,True,False,False,False,True,True,False,True,4 From 10,3 From 10,3 From 10,Depression
76,Usually,Sometimes,Seldom,Seldom,False,False,True,False,False,True,False,False,False,True,8 From 10,2 From 10,8 From 10,Depression

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
13,Usually,Usually,Sometimes,Sometimes,False,False,False,True,False,False,False,False,False,False,5 From 10,7 From 10,5 From 10,Normal
57,Seldom,Usually,Most-Often,Most-Often,True,False,True,False,True,False,False,False,False,True,7 From 10,2 From 10,2 From 10,Bipolar Type-1
115,Most-Often,Seldom,Usually,Sometimes,False,True,False,False,True,False,True,False,False,True,2 From 10,5 From 10,3 From 10,Depression
98,Most-Often,Sometimes,Usually,Usually,False,True,False,False,True,False,False,True,False,False,3 From 10,7 From 10,2 From 10,Depression
100,Usually,Sometimes,Seldom,Usually,True,False,False,False,False,True,True,False,False,True,8 From 10,1 From 10,8 From 10,Bipolar Type-1


Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
count,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96
unique,4,4,4,4,2,2,2,2,2,2,2,2,2,2,9,8,9,4
top,Sometimes,Sometimes,Sometimes,Sometimes,False,False,False,False,False,True,False,False,True,True,5 From 10,4 From 10,6 From 10,Bipolar Type-2
freq,37,40,33,32,49,52,58,60,52,49,59,49,51,52,17,27,19,27
