In [1]:
import numpy as np
import xgboost as xgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import dump_svmlight_file
import pandas as pd
from sklearn.metrics import precision_score

df = pd.read_csv("diabetes.csv")

df.info()





Numpy array precision: 0.7252155172413793
Svm file precision: 0.7252155172413793


In [None]:
X = df.drop("Class variable", axis=1)
y = df["Class variable"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

dump_svmlight_file(X_train, y_train, 'dtrain.svm', zero_based=True)
dump_svmlight_file(X_test, y_test, 'dtest.svm', zero_based=True)
dtrain_svm = xgb.DMatrix('dtrain.svm')
dtest_svm = xgb.DMatrix(X_test, label=y_test)

param = {
    'max_depth': 3,
    'eta': 0.3,
    'objective': 'multi:softprob',
    'num_class': 2
}
num_round = 20


In [None]:
bst = xgb.train(param, dtrain, num_round)
preds = bst.predict(dtest)

best_preds = np.asarray([np.argmax(line) for line in preds])
print("Numpy array precision:", precision_score(y_test, best_preds, average='macro'))

bst_svm = xgb.train(param, dtrain_svm, num_round)
preds = bst.predict(dtest_svm)

best_preds_svm = [np.argmax(line) for line in preds]
print("Svm file precision:", precision_score(y_test, best_preds_svm, average='macro'))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

confusion_mat = confusion_matrix(y_test, best_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mat, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

confusion_mat_svm = confusion_matrix(y_test, best_preds_svm)
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mat_svm, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix (SVM)')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


In [2]:
import sklearn.externals
import joblib

In [3]:
bst.dump_model('dump.raw.txt')
bst_svm.dump_model('dump_svm.raw.txt')

In [4]:
joblib.dump(bst, 'bst_model.pkl', compress=True)
joblib.dump(bst_svm, 'bst_svm_model.pkl', compress=True)

['bst_svm_model.pkl']