In [100]:
from configparser import ConfigParser
import psycopg2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from IPython.display import display
from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
import time
from imblearn.over_sampling import SMOTE

Import Data

In [101]:
x_train = pd.read_csv("x_train.csv")#read input data
y_train = pd.read_csv("y_train.csv")#read input data
x_test = pd.read_csv("x_test.csv")#read input data
y_test = pd.read_csv("y_test.csv")#read input data

In [102]:
x_train.drop(columns=x_train.columns[0], axis=1, inplace=True)
y_train.drop(columns=y_train.columns[0], axis=1, inplace=True)
x_test.drop(columns=x_test.columns[0], axis=1, inplace=True)
y_test.drop(columns=y_test.columns[0], axis=1, inplace=True)

Implement Gradient Boosting Model

In [103]:
tic = time.perf_counter()
gradientbooster = GradientBoostingClassifier(learning_rate=0.004, max_depth=3, max_features=10)
gradientbooster.fit(x_train, y_train.values.ravel())
toc = time.perf_counter()
print(f"Model Ran in {toc - tic:0.4f} seconds")
prediction = gradientbooster.predict(x_test)

Model Ran in 1.8233 seconds


Prediction Accuracy Recall

In [104]:
print("Accuracy:", metrics.accuracy_score(y_test, prediction))
print("Recall - Macro:", metrics.recall_score(y_test, prediction, average="macro"))
print("Precision - Macro:", metrics.precision_score(y_test, prediction, average="macro"))
print("Recall - Micro:", metrics.recall_score(y_test, prediction, average="micro"))
print("Precision - Micro:", metrics.precision_score(y_test, prediction, average="micro"))

Accuracy: 0.9616858237547893
Recall - Macro: 0.8241758241758241
Precision - Macro: 0.9862542955326461
Recall - Micro: 0.9616858237547893
Precision - Micro: 0.9616858237547893


Import Data with Reduced Features and SMOTE

In [105]:
x_train_reduced = pd.read_csv("x_train_reduced.csv")#read input data
y_train_reduced = pd.read_csv("y_train_reduced.csv")#read input data
x_test_reduced = pd.read_csv("x_test_reduced.csv")#read input data
y_test_reduced = pd.read_csv("y_test_reduced.csv")#read input data

x_train_reduced.drop(columns=x_train_reduced.columns[0], axis=1, inplace=True)
y_train_reduced.drop(columns=y_train_reduced.columns[0], axis=1, inplace=True)
x_test_reduced.drop(columns=x_test_reduced.columns[0], axis=1, inplace=True)
y_test_reduced.drop(columns=y_test_reduced.columns[0], axis=1, inplace=True)

sm_reduced = SMOTE(random_state=42)
x_res_reduced, y_res_reduced = sm_reduced.fit_resample(x_train_reduced, y_train_reduced)

Implement Model

In [110]:
tic = time.perf_counter()
gradientbooster_reduced = GradientBoostingClassifier(learning_rate=0.004, max_depth=3, max_features=10)
gradientbooster_reduced.fit(x_train_reduced, y_train_reduced.values.ravel())
toc = time.perf_counter()
print(f"Model Ran in {toc - tic:0.4f} seconds")
prediction_reduced = gradientbooster_reduced.predict(x_test_reduced)

Model Ran in 1.2001 seconds


Accuracy Precision Recall

In [107]:
print("Accuracy:", metrics.accuracy_score(y_test_reduced, prediction_reduced))
print("Recall - Macro:", metrics.recall_score(y_test_reduced, prediction_reduced, average="macro"))
print("Precision - Macro:", metrics.precision_score(y_test_reduced, prediction_reduced, average="macro"))
print("Recall - Micro:", metrics.recall_score(y_test_reduced, prediction_reduced, average="micro"))
print("Precision - Micro:", metrics.precision_score(y_test_reduced, prediction_reduced, average="micro"))

Accuracy: 0.9559386973180076
Recall - Macro: 0.8287912087912087
Precision - Macro: 0.9843537414965986
Recall - Micro: 0.9559386973180076
Precision - Micro: 0.9559386973180076
