# 1. import libraries

In [1]:
import warnings
import numpy as np
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.metrics import roc_auc_score
import xgboost as xgb
from sklearn.model_selection import cross_val_score, GridSearchCV

In [3]:
warnings.filterwarnings("ignore")
%matplotlib inline
plt.style.use('seaborn-white')
plt.rcParams['figure.figsize'] = 15,5

---
# 2. import dataset

In [4]:
df = pd.read_csv('clean.csv')
df.shape

(4293, 13)

In [5]:
y = df['Classification']
x = df.drop('Classification', axis=1)

---
# 3. GB Model - 1st iteration

In [6]:
parameters = {"max_depth": [5,50,100],
              "subsample": [0.25,0.50,0.75],
              "n_estimators": [5,50,100]}

In [7]:
model = xgb.XGBClassifier(objective='multi:softprob', random_state=42)
grid_search = GridSearchCV(estimator = model, param_grid = parameters, scoring="f1_macro", cv = 4, n_jobs = -1)
grid_search = grid_search.fit(x,y)



In [8]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.46218901164963727
{'max_depth': 50, 'n_estimators': 100, 'subsample': 0.5}


---
# 4. GB Model - 2nd iteration

In [9]:
parameters = {"max_depth": [25,50,75],
              "subsample": [0.5],
              "n_estimators": [100,200,500]}

In [10]:
model = xgb.XGBClassifier(objective='multi:softprob', random_state=42)
grid_search = GridSearchCV(estimator = model, param_grid = parameters, scoring="f1_macro", cv = 4, n_jobs = -1)
grid_search = grid_search.fit(x,y)



In [11]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.46409237612166876
{'max_depth': 25, 'n_estimators': 100, 'subsample': 0.5}


---
# 5. ROC AUC

In [12]:
model = xgb.XGBClassifier(objective='multi:softprob', max_depth=25, subsample=0.5, n_estimators=100, random_state = 42)
accuracies = cross_val_score(estimator = model, X= x, y = y, scoring = "f1_macro", cv = 4, n_jobs = -1)

In [13]:
print("F1 Score: %0.3f (+/- %0.3f)"  % (accuracies.mean(), accuracies.std() * 2))

F1 Score: 0.464 (+/- 0.087)
