In [1]:
from micromlgen import port

import pandas as pd
import xgboost as xgb
from xgboost import plot_tree
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import uniform, randint

from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix

In [2]:
data = pd.read_csv('cStick.csv')

In [3]:
data

Unnamed: 0,Distance,Pressure,HRV,Sugar level,SpO2,Accelerometer,Decision
0,25.540,1.0,101.396,61.080,87.770,1.0,1
1,2.595,2.0,110.190,20.207,65.190,1.0,2
2,68.067,0.0,87.412,79.345,99.345,0.0,0
3,13.090,1.0,92.266,36.180,81.545,1.0,1
4,69.430,0.0,89.480,80.000,99.990,0.0,0
...,...,...,...,...,...,...,...
2034,5.655,2.0,116.310,162.242,71.310,1.0,2
2035,9.660,2.0,124.320,177.995,79.320,1.0,2
2036,15.220,1.0,93.828,40.440,82.610,1.0,1
2037,9.120,2.0,123.240,175.871,78.240,1.0,2


In [18]:
X = data.drop('Decision ', axis=1)
y = data['Decision ']

In [31]:
X[y == 2].describe()

Unnamed: 0,Distance,Pressure,HRV,Sugar level,SpO2,Accelerometer
count,667.0,667.0,667.0,667.0,667.0,667.0
mean,4.995,2.0,114.99,93.574544,69.99,1.0
std,2.890359,0.0,5.780718,75.099341,5.780718,0.0
min,0.0,2.0,105.0,10.0,60.0,1.0
25%,2.4975,2.0,109.995,19.8235,64.995,1.0
50%,4.995,2.0,114.99,29.647,69.99,1.0
75%,7.4925,2.0,119.985,169.4695,74.985,1.0
max,9.99,2.0,124.98,179.293,79.98,1.0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)


In [6]:
params = {
    "colsample_bytree": uniform(0.7, 0.3),
    "gamma": uniform(0, 0.5),
    "learning_rate": uniform(0.03, 0.3), # default 0.1 
    "max_depth": randint(2, 6), # default 3
    "n_estimators": randint(100, 150), # default 100
    "subsample": uniform(0.6, 0.4)
}

best_model = RandomizedSearchCV(xgb.XGBClassifier(), param_distributions=params, n_iter=100, cv=3, verbose=1, n_jobs=-1, random_state=42)

In [7]:
best_model.fit(X_train, y_train)
best_model.best_params_

Fitting 3 folds for each of 100 candidates, totalling 300 fits


{'colsample_bytree': 0.8123620356542087,
 'gamma': 0.4753571532049581,
 'learning_rate': 0.24959818254342153,
 'max_depth': 2,
 'n_estimators': 120,
 'subsample': 0.6624074561769746}

In [8]:
params = best_model.best_params_

model = xgb.XGBClassifier(**params)
model.get_params()
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)
print(metrics.classification_report(y_test, y_pred, digits=3))
print(f'Model accuracy: {round(metrics.accuracy_score(y_test, y_pred)*100, 2)}%')


              precision    recall  f1-score   support

           0      1.000     1.000     1.000       214
           1      1.000     1.000     1.000       195
           2      1.000     1.000     1.000       203

    accuracy                          1.000       612
   macro avg      1.000     1.000     1.000       612
weighted avg      1.000     1.000     1.000       612

Model accuracy: 100.0%
