### Data Preparation

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("Data/mental-disorder.csv")
df.drop (columns= ["Patient Number","Try-Explanation"], inplace=True)
df.rename(columns={'Sleep dissorder' : 'Sleep_dissorder'}, inplace=True)
df.rename(columns={'Mood Swing' : 'Mood_Swing'}, inplace=True)
df.rename(columns={'Suicidal thoughts' : 'Suicidal_thoughts'}, inplace=True)
df.rename(columns={'Authority Respect' : 'Authority_Respect'}, inplace=True)
df.rename(columns={'Aggressive Response' : 'Aggressive_Response'}, inplace=True)
df.rename(columns={'Ignore & Move-On' : 'Ignore_MoveOn'}, inplace=True)
df.rename(columns={'Nervous Break-down' : 'Nervous_Breakdown'}, inplace=True)
df.rename(columns={'Admit Mistakes' : 'Admit_Mistakes'}, inplace=True)
df.rename(columns={'Sexual Activity' : 'Sexual_Activity'}, inplace=True)
df.rename(columns={'Expert Diagnose' : 'Klasifikasi'}, inplace=True)

df.head()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim,Klasifikasi
0,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal


In [3]:
X = df.drop(columns ="Klasifikasi")
y = df.Klasifikasi

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
X_train.shape, X_test.shape , y_train.shape , y_test.shape

((96, 16), (24, 16), (96,), (24,))

### PREPROCESSOR

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

In [5]:
categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encode", OneHotEncoder(handle_unknown="ignore"))
])

In [6]:
preprocessor = ColumnTransformer ([

    ("categoric", categorical_pipeline, ["Sadness","Euphoric","Exhausted","Sleep_dissorder","Mood_Swing","Suicidal_thoughts",
                                         "Anorxia","Authority_Respect","Aggressive_Response",
                                         "Ignore_MoveOn","Nervous_Breakdown","Admit_Mistakes","Overthinking",
                                         "Sexual_Activity","Concentration","Optimisim"])
])

### PIPELINE RANDOM FOREST

In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
pipeline1 = Pipeline ([
    ("prep", preprocessor),
    ("algoforest", RandomForestClassifier(n_estimators=56))
])

In [9]:
pipeline1.fit(X_train,y_train)

In [10]:
pipeline1.score(X_test,y_test)

0.9166666666666666

### random forest X grid search cv

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
parameter1 = {
    "algoforest__n_estimators" : range(1,101)

}
modelrf = GridSearchCV(pipeline1, parameter1, cv=3, n_jobs=-1, verbose=1, error_score='raise', scoring='accuracy')
modelrf.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


In [13]:
pd.DataFrame(modelrf.cv_results_).sort_values("rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algoforest__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
95,0.233927,0.002537,0.015941,0.000140,96,{'algoforest__n_estimators': 96},0.90625,0.81250,0.93750,0.885417,0.053115,1
93,0.504964,0.169158,0.053589,0.020809,94,{'algoforest__n_estimators': 94},0.87500,0.81250,0.93750,0.875000,0.051031,2
96,0.239852,0.003450,0.015755,0.000340,97,{'algoforest__n_estimators': 97},0.87500,0.84375,0.87500,0.864583,0.014731,3
56,0.243622,0.020465,0.025649,0.002179,57,{'algoforest__n_estimators': 57},0.90625,0.75000,0.93750,0.864583,0.082021,3
85,0.208654,0.002094,0.014560,0.000104,86,{'algoforest__n_estimators': 86},0.87500,0.81250,0.90625,0.864583,0.038976,3
...,...,...,...,...,...,...,...,...,...,...,...,...
5,0.027036,0.001286,0.007789,0.000129,6,{'algoforest__n_estimators': 6},0.68750,0.62500,0.75000,0.687500,0.051031,96
4,0.026639,0.002003,0.007716,0.000086,5,{'algoforest__n_estimators': 5},0.62500,0.68750,0.62500,0.645833,0.029463,97
2,0.022207,0.000892,0.008268,0.000320,3,{'algoforest__n_estimators': 3},0.65625,0.53125,0.59375,0.593750,0.051031,98
1,0.018157,0.001555,0.007041,0.000244,2,{'algoforest__n_estimators': 2},0.62500,0.62500,0.40625,0.552083,0.103120,99


In [14]:
modelrf.best_params_

{'algoforest__n_estimators': 96}

In [15]:
modelrf.score(X_train,y_train), modelrf.score(X_test,y_test)

(1.0, 0.875)

### DATA PREDICT

In [16]:
datapred = (
    ["Most-Often","Seldom","Sometimes","Sometimes","YES","NO","YES","YES","NO","NO","YES","YES","YES","2 From 10","7 From 10","3 From 10"],
     ["Sometimes","Sometimes","Sometimes","Sometimes","NO","NO","YES","YES","NO","NO","NO","NO","NO","9 From 10","9 From 10","9 From 10"]

)

X_pred = pd.DataFrame (datapred, index=["Rudi","Budi"], columns=X.columns)
X_pred

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim
Rudi,Most-Often,Seldom,Sometimes,Sometimes,YES,NO,YES,YES,NO,NO,YES,YES,YES,2 From 10,7 From 10,3 From 10
Budi,Sometimes,Sometimes,Sometimes,Sometimes,NO,NO,YES,YES,NO,NO,NO,NO,NO,9 From 10,9 From 10,9 From 10


In [17]:
modelrf.predict(X_pred)

array(['Bipolar Type-2', 'Normal'], dtype=object)

### PICKEL MODEL

In [18]:
import pickle

In [20]:
pickle.dump (modelrf,open("Data/modelml.pkl","wb"))