# Extreme Gradient Boosting

In [16]:
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

# Import metrics that would allow us to see how accurate the predictions are
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score


# Load data
data = pd.read_csv('Data/11-diabetes.csv')

# Split data into X and y
X = data.iloc[:, 0:8]
y = data.iloc[:,8]

# Split data into train and test sets
seed = 100
test_size = 0.25
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

# Fit model
model = XGBClassifier()
model.fit(X_train, y_train)

# Make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# Evaluate
print('CM:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

print('Evaluation Metrics:')
print('Accuracy Score: {}'.format(round(accuracy_score(y_test, y_pred), 4)))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

CM:
[[104  23]
 [ 29  36]]
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.82      0.80       127
           1       0.61      0.55      0.58        65

   micro avg       0.73      0.73      0.73       192
   macro avg       0.70      0.69      0.69       192
weighted avg       0.72      0.73      0.73       192

Evaluation Metrics:
Accuracy Score: 0.7292
Precision: 0.6101694915254238
Recall: 0.5538461538461539
F1 Score: 0.5806451612903227


### Scaled

In [17]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
# scaler = MinMaxScaler()
# scaler = RobustScaler()
scaler = StandardScaler()

# Transform the variables to be on the same scale
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fit model
model = XGBClassifier()
model.fit(X_train, y_train)

# Make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# Evaluate
print('CM:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

print('Evaluation Metrics:')
print('Accuracy Score: {}'.format(round(accuracy_score(y_test, y_pred), 4)))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

CM:
[[104  23]
 [ 29  36]]
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.82      0.80       127
           1       0.61      0.55      0.58        65

   micro avg       0.73      0.73      0.73       192
   macro avg       0.70      0.69      0.69       192
weighted avg       0.72      0.73      0.73       192

Evaluation Metrics:
Accuracy Score: 0.7292
Precision: 0.6101694915254238
Recall: 0.5538461538461539
F1 Score: 0.5806451612903227
