In [1]:
import pickle
def loadfile(filename):
    with open(f'{filename}.pickle', 'rb') as fp:
        data = pickle.load(fp)
    return data

In [2]:
import numpy as np

X_train = loadfile('X_train')
X_test = loadfile('X_test')
y_train = loadfile('y_train')
y_test = loadfile('y_test')

In [3]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(76786, 9)
(76786,)
(19197, 9)
(19197,)


In [3]:
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.81      0.65      0.72      1586
           1       0.97      0.99      0.98     17611

    accuracy                           0.96     19197
   macro avg       0.89      0.82      0.85     19197
weighted avg       0.96      0.96      0.96     19197



In [4]:
from sklearn.model_selection import GridSearchCV

model = GradientBoostingClassifier()
param_grid = {
    'learning_rate': [0.01, 0.1],
    'n_estimators': [60, 80, 100],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 3, 4],
}
grid_search = GridSearchCV(model, param_grid, cv=10, scoring= 'f1_micro')
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)

{'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 4, 'n_estimators': 100}


In [5]:
model = GradientBoostingClassifier(learning_rate= 0.1, max_depth= 5, min_samples_split=4, n_estimators= 100)
model.fit(X_train, y_train)
predict = model.predict(X_test)
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.81      0.66      0.73      1586
           1       0.97      0.99      0.98     17611

    accuracy                           0.96     19197
   macro avg       0.89      0.82      0.85     19197
weighted avg       0.96      0.96      0.96     19197

