# **Instalasi Cardea**

In [1]:
! pip install cardea
# ! pip install 'urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1' # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi



# **Import Library** 

In [2]:
# Library
import pandas as pd
import numpy as np

# Cardea
from cardea import Cardea

# Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from xgboost import XGBClassifier

# Grid Searh & Random Search
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [3]:
# optional
import warnings
warnings.filterwarnings("ignore")

In [4]:
cd = Cardea()

In [5]:
! curl -O https://dai-cardea.s3.amazonaws.com/kaggle.zip && unzip kaggle.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2988k  100 2988k    0     0  5216k      0 --:--:-- --:--:-- --:--:-- 5216k
Archive:  kaggle.zip
replace Patient.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: Patient.csv             
  inflating: Coding.csv              
  inflating: Appointment_Participant.csv  
  inflating: Address.csv             
 extracting: CodeableConcept.csv     
  inflating: Reference.csv           
  inflating: Observation.csv         
  inflating: Identifier.csv          
  inflating: Appointment.csv         


In [6]:
cd.load_entityset(data='kaggle')

# to view the loaded entityset
cd.es

Entityset: fhir
  Entities:
    Appointment_Participant [Rows: 6100, Columns: 2]
    Address [Rows: 81, Columns: 2]
    Identifier [Rows: 227151, Columns: 1]
    CodeableConcept [Rows: 4, Columns: 2]
    Patient [Rows: 6100, Columns: 4]
    Reference [Rows: 6100, Columns: 1]
    Coding [Rows: 3, Columns: 2]
    Appointment [Rows: 110527, Columns: 5]
    Observation [Rows: 110527, Columns: 3]
  Relationships:
    Appointment_Participant.actor -> Reference.identifier
    CodeableConcept.coding -> Coding.object_id
    Patient.address -> Address.object_id
    Appointment.participant -> Appointment_Participant.object_id
    Observation.code -> CodeableConcept.object_id
    Observation.subject -> Reference.identifier

In [7]:
cd.list_problems()

{'DiagnosisPrediction',
 'LengthOfStay',
 'MissedAppointment',
 'MortalityPrediction',
 'ProlongedLengthOfStay',
 'Readmission'}

In [8]:
# select problem
label_times = cd.select_problem('MissedAppointment')

In [9]:
# feature engineering
feature_matrix = cd.generate_features(label_times[:1000]) # takes a while for the full dataset
feature_matrix.head(5)

Built 13 features
Elapsed: 00:34 | Progress: 100%|██████████


Unnamed: 0,participant = 2680425062,participant = 4275143764,participant = 2615334244,participant = 2410824900,participant = 2406221984,participant = 1868414665,participant = 1692482157,participant = 1125465544,participant = 846537388,participant = 4121228070,participant is unknown,DAY(created) = 29,DAY(created) = 28,DAY(created) = 27,DAY(created) = 18,DAY(created) = 15,DAY(created) = 26,DAY(created) = 25,DAY(created) = 5,DAY(created) = 1,DAY(created) = 8,DAY(created) is unknown,DAY(start) = 29,DAY(start) is unknown,IS_WEEKEND(created),IS_WEEKEND(start),MONTH(created) = 4,MONTH(created) = 3,MONTH(created) = 2,MONTH(created) = 1,MONTH(created) is unknown,MONTH(start) = 4,MONTH(start) is unknown,WEEKDAY(created) = 4,WEEKDAY(created) = 2,WEEKDAY(created) = 1,WEEKDAY(created) = 3,WEEKDAY(created) = 0,WEEKDAY(created) is unknown,WEEKDAY(start) = 4,WEEKDAY(start) is unknown,YEAR(created) = 2016,YEAR(created) is unknown,YEAR(start) = 2016,YEAR(start) is unknown,Appointment_Participant.actor = 74200000000000,Appointment_Participant.actor = 713000000000000,Appointment_Participant.actor = 41400000000000,Appointment_Participant.actor = 28200000000000,Appointment_Participant.actor = 7270000000000,Appointment_Participant.actor = 7230000000000,Appointment_Participant.actor = 4920000000000,Appointment_Participant.actor = 3880000000000,Appointment_Participant.actor = 2760000000000,Appointment_Participant.actor = 735000000000000,Appointment_Participant.actor is unknown,Appointment_Participant.COUNT(Appointment),label
0,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,39,noshow
1,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,27,noshow
2,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,55,noshow
3,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,39,noshow
4,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,28,noshow


In [10]:
# shuffle the dataframe
feature_matrix = feature_matrix.sample(frac=1)

# pop the target labels
y = list(feature_matrix.pop('label'))
X = feature_matrix.values

X_train, X_test, y_train, y_test = cd.train_test_split(X, y, test_size=0.2, shuffle=True)

# **Pengujian Pipeline**

##**Modeling**

In [11]:
cd.select_pipeline('Random Forest')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [12]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.7, 'Confusion Matrix': array([[ 1,  6],
        [ 6, 27]]), 'F1 Macro': 0.48051948051948057, 'Precision': 0.48051948051948057, 'Recall': 0.48051948051948057}

## **Modeling 1**

In [13]:
cd.select_pipeline('Logistic Regression')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [14]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.775, 'Confusion Matrix': array([[ 0,  9],
        [ 0, 31]]), 'F1 Macro': 0.4366197183098592, 'Precision': 0.3875, 'Recall': 0.5}

## **Modeling 2**

In [15]:
cd.select_pipeline('Gaussian Naive Bayes')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [16]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.3, 'Confusion Matrix': array([[ 8,  2],
        [26,  4]]), 'F1 Macro': 0.29292929292929293, 'Precision': 0.4509803921568627, 'Recall': 0.4666666666666667}

## **Modeling 3**

In [17]:
cd.select_pipeline('Gradient Boosting')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [18]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.9, 'Confusion Matrix': array([[ 0,  3],
        [ 1, 36]]), 'F1 Macro': 0.47368421052631576, 'Precision': 0.46153846153846156, 'Recall': 0.4864864864864865}

## **Modeling 4**

In [19]:
cd.select_pipeline('K-Nearest Neightbors')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [20]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.7, 'Confusion Matrix': array([[ 0,  9],
        [ 3, 28]]), 'F1 Macro': 0.411764705882353, 'Precision': 0.3783783783783784, 'Recall': 0.45161290322580644}

## **Modeling 5**

In [21]:
cd.select_pipeline('Multinomial Naive Bayes')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [22]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.725, 'Confusion Matrix': array([[ 1,  6],
        [ 5, 28]]), 'F1 Macro': 0.494833524684271, 'Precision': 0.49509803921568624, 'Recall': 0.4956709956709957}

## **Modeling 6**

In [23]:
# modeling6
cd.select_pipeline('Stochastic Gradient Descent')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [24]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.85, 'Confusion Matrix': array([[ 2,  4],
        [ 2, 32]]), 'F1 Macro': 0.6571428571428571, 'Precision': 0.6944444444444444, 'Recall': 0.6372549019607843}

## **Modeling 7**

In [25]:
# modeling7
cd.select_pipeline('XGB')
cd.fit(X_train, y_train)
# y_pred = cd.predict(X_test) # Dijadikan komentar karena keberadaannya terdapat pada baris code asli milik Author, namun tidak memengaruhi baris code dari hasil modifikasi

In [26]:
cd.evaluate(X_test, y_test, test_size=0.2, shuffle=True)

{'Accuracy': 0.75, 'Confusion Matrix': array([[ 0,  8],
        [ 2, 30]]), 'F1 Macro': 0.4285714285714286, 'Precision': 0.39473684210526316, 'Recall': 0.46875}

# **Inisiasi Hyperparameter**

In [27]:
param_rf = {'criterion': ["gini","entropy"], 'n_estimators': [10, 100, 1000]} # Hyperparameter Random Forest
param_lr = {'C': np.logspace(-3,3,5,7), 'penalty': ["l1","l2"], 'fit_intercept': ["True", "False"]} # Hyperparameter Logistic Regression
param_gnb = {'var_smoothing': [1e-9]} # Hyperparameter Gaussian Naive Bayes
param_gb = {'learning_rate': [0.001, 0.01, 0.1, 0.3], 'n_estimators': [10, 100, 1000]} #Hyperparameter Gradient Boosting
param_knn = {'n_neighbors': [5], 'weights': ['uniform', 'distance'], 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']} # Hyperparameter K-Nearest Neighbors
param_mnb = {'alpha': [1.0, 0.01, 0.001], "fit_prior": ['True', 'False']} # Hyperparameter Multinomial Naive Bayes
param_sgd = {'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'], 'penalty': ['l2', 'l1', 'elasticnet']} # Hyperparameter Stochastic Gradient Descent
param_xgb = {'booster': ['gbtree', 'gblinear']} # Hyperparameter XGboost

# **Implementasi Grid Search**

## **Grid Search Random Forest**

In [28]:
gridsearch = GridSearchCV(RandomForestClassifier(), param_rf, scoring='accuracy')
gridsearch.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators='warn', n_jobs=None,
                                              oob_score=False,
                                              random_state=None, verbose=0,
                                              warm_start=False),
           

## **Grid Search Logistic Regression**

In [29]:
gridsearch1 = GridSearchCV(LogisticRegression(), param_lr, scoring='accuracy')
gridsearch1.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='warn',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='warn',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'C': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,
       1.00000000e+03]),
                         'fit_intercept': ['True', 'False'],
                         'penalty': ['l1', 'l2']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='ac

## **Grid Search Gaussian Naive Bayes**

In [30]:
gridsearch2 = GridSearchCV(GaussianNB(), param_gnb, scoring='accuracy')
gridsearch2.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=GaussianNB(priors=None, var_smoothing=1e-09), iid='warn',
             n_jobs=None, param_grid={'var_smoothing': [1e-09]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

## **Grid Search Gradient Boosting**

In [31]:
gridsearch3 = GridSearchCV(GradientBoostingClassifier(), param_gb, scoring='accuracy')
gridsearch3.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=GradientBoostingClassifier(criterion='friedman_mse',
                                                  init=None, learning_rate=0.1,
                                                  loss='deviance', max_depth=3,
                                                  max_features=None,
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                                  n_estimators=100,
                                                  n_iter_no_change=None,
                                                  pre

## **Grid Search K-Nearest Neightbors**

In [32]:
gridsearch4 = GridSearchCV(KNeighborsClassifier(), param_knn, scoring='accuracy')
gridsearch4.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='warn', n_jobs=None,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'n_neighbors': [5],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

## **Grid Search Multinomial Naive Bayes**

In [33]:
gridsearch5 = GridSearchCV(MultinomialNB(), param_mnb, scoring='accuracy')
gridsearch5.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=MultinomialNB(alpha=1.0, class_prior=None,
                                     fit_prior=True),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [1.0, 0.01, 0.001],
                         'fit_prior': ['True', 'False']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

## **Grid Search Stochastic Gradient Descent**

In [34]:
gridsearch6 = GridSearchCV(SGDClassifier(), param_sgd, scoring='accuracy')
gridsearch6.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SGDClassifier(alpha=0.0001, average=False,
                                     class_weight=None, early_stopping=False,
                                     epsilon=0.1, eta0=0.0, fit_intercept=True,
                                     l1_ratio=0.15, learning_rate='optimal',
                                     loss='hinge', max_iter=1000,
                                     n_iter_no_change=5, n_jobs=None,
                                     penalty='l2', power_t=0.5,
                                     random_state=None, shuffle=True, tol=0.001,
                                     validation_fraction=0.1, verbose=0,
                                     warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'loss': ['hinge', 'log', 'modified_huber',
                                  'squared_hinge', 'perceptron', 'huber',
                                  'epsilon_insensiti

## **Grid Search XGBoost**

In [35]:
gridsearch7 = GridSearchCV(XGBClassifier(), param_xgb, scoring='accuracy')
gridsearch7.fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1, gamma=0,
                                     learning_rate=0.1, max_delta_step=0,
                                     max_depth=3, min_child_weight=1,
                                     missing=None, n_estimators=100, n_jobs=1,
                                     nthread=None, objective='binary:logistic',
                                     random_state=0, reg_alpha=0, reg_lambda=1,
                                     scale_pos_weight=1, seed=None, silent=None,
                                     subsample=1, verbosity=1),
             iid='warn', n_jobs=None,
             param_grid={'booster': ['gbtree', 'gblinear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='acc

# **Hasil Grid Search**

In [36]:
print (gridsearch.best_params_) # RF
print(gridsearch1.best_params_) # LR
print(gridsearch2.best_params_) # GNB
print(gridsearch3.best_params_) # GB
print(gridsearch4.best_params_) # KNN
print(gridsearch5.best_params_) # MNB
print(gridsearch6.best_params_) # SGD
print(gridsearch7.best_params_) # XGB

{'criterion': 'entropy', 'n_estimators': 1000}
{'C': 0.001, 'fit_intercept': 'True', 'penalty': 'l1'}
{'var_smoothing': 1e-09}
{'learning_rate': 0.001, 'n_estimators': 10}
{'algorithm': 'ball_tree', 'n_neighbors': 5, 'weights': 'uniform'}
{'alpha': 1.0, 'fit_prior': 'True'}
{'loss': 'huber', 'penalty': 'l1'}
{'booster': 'gblinear'}


# **Implementasi Hyperparameter Berdasarkan Hasil Grid Search**

## **Random Forest**

In [37]:
rf = RandomForestClassifier(**gridsearch.best_params_).fit(X_train, y_train)
y_test_pred = rf.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.29      0.31      0.30        32
      noshow       0.87      0.86      0.86       168

    accuracy                           0.77       200
   macro avg       0.58      0.58      0.58       200
weighted avg       0.78      0.77      0.77       200



## **Logistic Regression**

In [38]:
lr = LogisticRegression(**gridsearch1.best_params_).fit(X_train, y_train)
y_test_pred = lr.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



## **Gaussian Naive Bayes**

In [39]:
gnb = GaussianNB(**gridsearch2.best_params_).fit(X_train, y_train)
y_test_pred = gnb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.16      0.97      0.28        32
      noshow       0.86      0.04      0.07       168

    accuracy                           0.18       200
   macro avg       0.51      0.50      0.17       200
weighted avg       0.75      0.18      0.10       200



## **Gradient Boosting**

In [40]:
gb = GradientBoostingClassifier(**gridsearch3.best_params_).fit(X_train, y_train)
y_test_pred = gb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



## **K-Nearest Neighbors**

In [41]:
knn = KNeighborsClassifier(**gridsearch4.best_params_).fit(X_train, y_train)
y_test_pred = knn.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.17      0.03      0.05        32
      noshow       0.84      0.97      0.90       168

    accuracy                           0.82       200
   macro avg       0.50      0.50      0.48       200
weighted avg       0.73      0.82      0.76       200



## **Multinomial Naive Bayes**

In [42]:
mnb = MultinomialNB(**gridsearch5.best_params_).fit(X_train, y_train)
y_test_pred = mnb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       1.00      0.06      0.12        32
      noshow       0.85      1.00      0.92       168

    accuracy                           0.85       200
   macro avg       0.92      0.53      0.52       200
weighted avg       0.87      0.85      0.79       200



## **Stochastic Gradient Descent**

In [43]:
sgd = SGDClassifier(**gridsearch6.best_params_).fit(X_train, y_train)
y_test_pred = sgd.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.16      1.00      0.28        32
      noshow       1.00      0.02      0.05       168

    accuracy                           0.18       200
   macro avg       0.58      0.51      0.16       200
weighted avg       0.87      0.18      0.08       200



## **XGBoost**

In [44]:
xgb = XGBClassifier(**gridsearch7.best_params_).fit(X_train, y_train)
y_test_pred = xgb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



# **Implementasi Random Search**

## **Random Forest**

In [45]:
randomsearch = RandomizedSearchCV(RandomForestClassifier(), param_rf, scoring='accuracy')
randomsearch.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators='warn',
                                                    n_jobs

## **Logistic Regression**

In [46]:
randomsearch1 = RandomizedSearchCV(LogisticRegression(), param_lr, scoring='accuracy')
randomsearch1.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=100,
                                                multi_class='warn', n_jobs=None,
                                                penalty='l2', random_state=None,
                                                solver='warn', tol=0.0001,
                                                verbose=0, warm_start=False),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'C': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,
       1.00000000e+03]),
                                        'fit_intercept': ['True', 'False'],
                                        'penalty':

## **Gaussian Naive Bayes**

In [47]:
randomsearch2 = RandomizedSearchCV(GaussianNB(), param_gnb, scoring='accuracy')
randomsearch2.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=GaussianNB(priors=None, var_smoothing=1e-09),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'var_smoothing': [1e-09]},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring='accuracy', verbose=0)

## **Gradient Boosting**

In [48]:
randomsearch3 = RandomizedSearchCV(GradientBoostingClassifier(), param_gb, scoring='accuracy')
randomsearch3.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=GradientBoostingClassifier(criterion='friedman_mse',
                                                        init=None,
                                                        learning_rate=0.1,
                                                        loss='deviance',
                                                        max_depth=3,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                

## **K-Nearest Neighbors**

In [49]:
randomsearch4 = RandomizedSearchCV(KNeighborsClassifier(), param_knn, scoring='accuracy')
randomsearch4.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=KNeighborsClassifier(algorithm='auto',
                                                  leaf_size=30,
                                                  metric='minkowski',
                                                  metric_params=None,
                                                  n_jobs=None, n_neighbors=5,
                                                  p=2, weights='uniform'),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'algorithm': ['auto', 'ball_tree',
                                                      'kd_tree', 'brute'],
                                        'n_neighbors': [5],
                                        'weights': ['uniform', 'distance']},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring='accuracy', verbose=0)

## **Multinomial Naive Bayes**

In [50]:
randomsearch5 = RandomizedSearchCV(MultinomialNB(), param_mnb, scoring='accuracy')
randomsearch5.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=MultinomialNB(alpha=1.0, class_prior=None,
                                           fit_prior=True),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'alpha': [1.0, 0.01, 0.001],
                                        'fit_prior': ['True', 'False']},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring='accuracy', verbose=0)

## **Stochastic Stochastic Gradient Descent**

In [51]:
randomsearch6 = RandomizedSearchCV(SGDClassifier(), param_sgd, scoring='accuracy')
randomsearch6.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=SGDClassifier(alpha=0.0001, average=False,
                                           class_weight=None,
                                           early_stopping=False, epsilon=0.1,
                                           eta0=0.0, fit_intercept=True,
                                           l1_ratio=0.15,
                                           learning_rate='optimal',
                                           loss='hinge', max_iter=1000,
                                           n_iter_no_change=5, n_jobs=None,
                                           penalty='l2', power_t=0.5,
                                           random_state=None, shuffle=True,
                                           tol=0.0...
                                           verbose=0, warm_start=False),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'loss': ['

## **XGBoost**

In [52]:
randomsearch7 = RandomizedSearchCV(XGBClassifier(), param_xgb, scoring='accuracy')
randomsearch7.fit(X_train, y_train)

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
                   estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=1,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0,
                                           learning_rate=0.1, max_delta_step=0,
                                           max_depth=3, min_child_weight=1,
                                           missing=None, n_estimators=100,
                                           n_jobs=1, nthread=None,
                                           objective='binary:logistic',
                                           random_state=0, reg_alpha=0,
                                           reg_lambda=1, scale_pos_weight=1,
                                           seed=None, silent=None, subsample=1,
                                           verbosity=1),
     

# **Hasil Random Search**

In [53]:
print(randomsearch.best_params_) # RF
print(randomsearch1.best_params_) # LR
print(randomsearch2.best_params_) # GNB
print(randomsearch3.best_params_) # GB
print(randomsearch4.best_params_) # KNN
print(randomsearch5.best_params_) # MNB
print(randomsearch6.best_params_) # SGD
print(randomsearch7.best_params_) # XGB

{'n_estimators': 100, 'criterion': 'entropy'}
{'penalty': 'l2', 'fit_intercept': 'True', 'C': 0.03162277660168379}
{'var_smoothing': 1e-09}
{'n_estimators': 10, 'learning_rate': 0.001}
{'weights': 'uniform', 'n_neighbors': 5, 'algorithm': 'ball_tree'}
{'fit_prior': 'True', 'alpha': 1.0}
{'penalty': 'elasticnet', 'loss': 'modified_huber'}
{'booster': 'gblinear'}


# **Implementasi Hyperparameter Berdasarkan Hasil Random Search**

## **Random Forest**

In [54]:
rf = RandomForestClassifier(**randomsearch.best_params_).fit(X_train, y_train)
y_test_pred = rf.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.27      0.28      0.28        32
      noshow       0.86      0.86      0.86       168

    accuracy                           0.77       200
   macro avg       0.57      0.57      0.57       200
weighted avg       0.77      0.77      0.77       200



## **Logistic Regression**

In [55]:
lr = LogisticRegression(**randomsearch1.best_params_).fit(X_train, y_train)
y_test_pred = lr.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



## **Gaussian Naive Bayes**

In [56]:
gnb = GaussianNB(**randomsearch2.best_params_).fit(X_train, y_train)
y_test_pred = gnb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.16      0.97      0.28        32
      noshow       0.86      0.04      0.07       168

    accuracy                           0.18       200
   macro avg       0.51      0.50      0.17       200
weighted avg       0.75      0.18      0.10       200



## **Gradient Boosting**

In [57]:
gb = GradientBoostingClassifier(**randomsearch3.best_params_).fit(X_train, y_train)
y_test_pred = gb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



## **K-Nearest Neighbors**

In [58]:
knn = KNeighborsClassifier(**randomsearch4.best_params_).fit(X_train, y_train)
y_test_pred = knn.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.17      0.03      0.05        32
      noshow       0.84      0.97      0.90       168

    accuracy                           0.82       200
   macro avg       0.50      0.50      0.48       200
weighted avg       0.73      0.82      0.76       200



## **Multinomial Naive Bayes**

In [59]:
mnb = MultinomialNB(**randomsearch5.best_params_).fit(X_train, y_train)
y_test_pred = mnb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       1.00      0.06      0.12        32
      noshow       0.85      1.00      0.92       168

    accuracy                           0.85       200
   macro avg       0.92      0.53      0.52       200
weighted avg       0.87      0.85      0.79       200



## **Stochastic Gradient Descent**

In [60]:
sgd = SGDClassifier(**randomsearch6.best_params_).fit(X_train, y_train)
y_test_pred = sgd.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200



## **XGBoost**

In [61]:
xgb = XGBClassifier(**randomsearch7.best_params_).fit(X_train, y_train)
y_test_pred = xgb.predict(X_test)
print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

   fulfilled       0.00      0.00      0.00        32
      noshow       0.84      1.00      0.91       168

    accuracy                           0.84       200
   macro avg       0.42      0.50      0.46       200
weighted avg       0.71      0.84      0.77       200

