In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier, cv
import time
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, GridSearchCV

In [None]:
print("Data loading....")
df = pd.read_csv("https://raw.githubusercontent.com/benvictoria17/MachineLearning/master/dataset/Company%20Bankruptcy%20Prediction/data.csv")
print("Data load complete")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
X_df = df.iloc[:, 1:]
y_df = df.iloc[:, 0]
x_train, x_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.3, random_state=123, stratify=y_df)
xgb = XGBClassifier(n_estimators=20, learning_rate=0.1, max_depth=50, use_label_encoder=False)
xgb.fit(x_train, y_train)
pred = xgb.predict(x_test)
Acc_score = accuracy_score(pred, y_test)
F1_score = f1_score(pred, y_test)
Precision_score = precision_score(pred, y_test)
print("Acc score : {:.2f}".format(Acc_score))
print("F1 score : {:.2f}".format(F1_score))
print("Precision score : {:.2f}".format(Precision_score))

In [None]:
cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=123)
xgb = XGBClassifier(n_estimators=20, learning_rate=0.1, max_depth=50, use_label_encoder=False,
                   objective = "binary:hinge")

for train_idx, test_idx in cv.split(X_df, y_df):
    x_train, x_test = X_df.iloc[train_idx, :], X_df.iloc[test_idx, :]
    y_train, y_test = y_df[train_idx], y_df[test_idx]
    
    xgb.fit(x_train, y_train)
    pred = xgb.predict(x_test)
    Acc_score = accuracy_score(pred, y_test)
    F1_score = f1_score(pred, y_test)
    Precision_score = precision_score(pred, y_test)
    print("Acc score : {:.2f}".format(Acc_score))
    print("F1 score : {:.2f}".format(F1_score))
    print("Precision score : {:.2f}".format(Precision_score))

In [None]:
xgb = XGBClassifier()
# use_label_encoder=False, objective="binary:hinge"

xgb_param_grid = {'max_depth': [10,20], 'n_estimators': [10,20], 'learning_rate': [0.1],
                 'use_label_encoder' : [False], 'objective' : ["binary:hinge"]}

hr_grid = GridSearchCV(estimator=xgb,
                       param_grid=xgb_param_grid,
                       scoring='roc_auc',
                       n_jobs=8,
                       cv=5,
                       refit=True, 
                       return_train_score=True)

hr_grid.fit(X_df, y_df)

In [None]:
hr_grid_df = pd.DataFrame(hr_grid.cv_results_)
hr_grid_df

In [None]:
hr_grid_df[hr_grid_df['rank_test_score']==1]