In [1]:
import pickle
def loadfile(filename):
    with open(f'{filename}.pickle', 'rb') as fp:
        data = pickle.load(fp)
    return data

In [2]:
import numpy as np

X_train = loadfile('X_train')
X_test = loadfile('X_test')
y_train = loadfile('y_train')
y_test = loadfile('y_test')

In [7]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(76786, 9)
(76786,)
(19197, 9)
(19197,)


In [3]:
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.83      0.66      0.73      1586
           1       0.97      0.99      0.98     17611

    accuracy                           0.96     19197
   macro avg       0.90      0.82      0.86     19197
weighted avg       0.96      0.96      0.96     19197



## tuning parameters

In [4]:
from sklearn.model_selection import GridSearchCV

model = RandomForestClassifier()
param_grid = {
    'n_estimators': [60, 80, 100],
    'max_depth': [3, 4, 5],
    'min_samples_leaf': [1, 2, 3],
}

grid_search = GridSearchCV(model, param_grid, cv=10, scoring= 'f1_micro')
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)

{'max_depth': 5, 'min_samples_leaf': 2, 'n_estimators': 100}


In [5]:
model = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=2) #param o tren kia
model.fit(X_train, y_train)
predict = model.predict(X_test)
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.84      0.57      0.68      1586
           1       0.96      0.99      0.98     17611

    accuracy                           0.96     19197
   macro avg       0.90      0.78      0.83     19197
weighted avg       0.95      0.96      0.95     19197

