In [1]:
# Import Library
import pandas as pd
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
from sklearn.model_selection import cross_validate

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# 📂DATA : 3 Classes

In [37]:
fold = 3

In [38]:
path = "/home/kannika/code/Rheology2023/Rheology_Blood/DataBlood_Viscosity_TrainML_3Fold_split3class.csv"
data_feature = pd.read_csv(path)
print(data_feature.shape)
print("-"*100)
print(f"All Fold : {set(data_feature.fold)}")
## Split Train data Set
feature_train = data_feature[data_feature["fold"]!=fold].reset_index(drop=True)
print(f"Train Set : Fold ==> {set(feature_train.fold)}")
print("Train = ", feature_train.shape)
## Split Valid data Set
feature_test = data_feature[data_feature["fold"]==fold].reset_index(drop=True)
print(f"Valiadtion Set : Fold ==> {set(feature_test.fold)}")
print("Validation = ", feature_test.shape)
## Print DataFrame
feature_train.head()

(51, 9)
----------------------------------------------------------------------------------------------------
All Fold : {1, 2, 3}
Train Set : Fold ==> {1, 2}
Train =  (34, 9)
Valiadtion Set : Fold ==> {3}
Validation =  (17, 9)


Unnamed: 0.1,Unnamed: 0,Code,classes,subclass,fold,classes_binary,MCV,MCH,Hb
0,0,HN35,HN,No_Splenectomy,1,1.0,74.3,23.4,7.1
1,1,HN38,HN,No_Splenectomy,1,1.0,72.9,22.6,8.0
2,2,HN43,HN,No_Splenectomy,1,1.0,57.4,17.7,4.4
3,3,NBL14,NBL,Normal,1,0.0,86.8,28.1,13.2
4,4,NBL23,NBL,Normal,1,0.0,66.5,20.3,13.0


In [39]:
X_train = feature_train[['MCV','MCH','Hb']]
y_train = feature_train["subclass"]
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

(34, 3)
(34,)
No_Splenectomy


In [40]:
print(len(list(set(y_train))))
print(set(y_train))

3
{'No_Splenectomy', 'Splenectomy', 'Normal'}


## 🩸 Parameter:  XGboost

In [41]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [42]:
from xgboost import XGBClassifier

## Seting parameter to find the best
learning_rates=[0.01, 0.1]
max_depths = [5, 10]
gammas = [0, 0.5, 0.8]
## Find optimiz paremeter 
#xgboost = XGBClassifier(n_estimator = 100, tree_method='gpu_hist', silent=True)
xgboost = XGBClassifier(random_state = 1, tree_method='gpu_hist')
param_grid = {'gamma' : gammas, 'max_depth' : max_depths, 'learning_rate' : learning_rates}
grid_search = GridSearchCV(xgboost, param_grid, cv=10, n_jobs=-1) 
XGboostModel = grid_search.fit(X_train, y_train)  ##print(bestF)



In [43]:
# view the results as a pandas DataFrame
best_xgboostdf = pd.DataFrame(XGboostModel.cv_results_)[['rank_test_score', 'mean_test_score', 'std_test_score', 'params']]
best_xgboostdf = best_xgboostdf.sort_values(by='rank_test_score', ascending=True)
best_xgboostdf = best_xgboostdf.reset_index(drop=True)
best_xgboostdf.head()

Unnamed: 0,rank_test_score,mean_test_score,std_test_score,params
0,1,0.766667,0.222985,"{'gamma': 0, 'learning_rate': 0.01, 'max_depth..."
1,1,0.766667,0.222985,"{'gamma': 0, 'learning_rate': 0.01, 'max_depth..."
2,1,0.766667,0.222985,"{'gamma': 0.5, 'learning_rate': 0.01, 'max_dep..."
3,1,0.766667,0.222985,"{'gamma': 0.5, 'learning_rate': 0.01, 'max_dep..."
4,1,0.766667,0.222985,"{'gamma': 0.5, 'learning_rate': 0.1, 'max_dept..."


In [44]:
# examine the first result
print("**examine the first result","\n")

print(XGboostModel.cv_results_['params'][0])
print(XGboostModel.cv_results_['mean_test_score'][0])

# print the array of mean scores only
print("\n","**print the array of mean scores only","\n")

grid_mean_scores = XGboostModel.cv_results_['mean_test_score']
print(grid_mean_scores)

# examine the best model
print("\n","**examine the best model","\n")

print(XGboostModel.best_score_)
print(XGboostModel.best_params_)
print(XGboostModel.best_estimator_)

**examine the first result 

{'gamma': 0, 'learning_rate': 0.01, 'max_depth': 5}
0.7666666666666666

 **print the array of mean scores only 

[0.76666667 0.76666667 0.70833333 0.70833333 0.76666667 0.76666667
 0.76666667 0.76666667 0.76666667 0.76666667 0.76666667 0.76666667]

 **examine the best model 

0.7666666666666666
{'gamma': 0, 'learning_rate': 0.01, 'max_depth': 5}
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=0, importance_type=None,
              interaction_constraints='', learning_rate=0.01, max_delta_step=0,
              max_depth=5, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=24,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='gpu_hist

In [45]:
#Print the tured parameters and score
print("Tuned Decision Tree Parameters: {}".format(XGboostModel.best_params_))
print("Best score is {}".format(XGboostModel.best_score_))

Tuned Decision Tree Parameters: {'gamma': 0, 'learning_rate': 0.01, 'max_depth': 5}
Best score is 0.7666666666666666


In [46]:
best_params_ =  XGboostModel.best_params_
gamma_ = best_params_['gamma'] ##<--- best_params = gammas
learning_rate_ = best_params_['learning_rate']  ##<--- best_params = learning_rate
max_depth_ = best_params_['max_depth'] ##<--- best_params = max_depth    

> ## 🚀 Fit Model and setting parameters

In [47]:
## **- train Fit Model XGBOOST
XGBOOST= XGBClassifier(random_state = 1, tree_method='gpu_hist', gamma=gamma_, learning_rate=learning_rate_, max_depth=max_depth_)
modelXGBOOST= XGBOOST.fit(X_train, y_train) # train XGBOOST model
modelXGBOOST



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=0, importance_type=None,
              interaction_constraints='', learning_rate=0.01, max_delta_step=0,
              max_depth=5, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=24,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='gpu_hist', validate_parameters=1,
              verbosity=None)

In [48]:
### Testing Check Parameter
print(modelXGBOOST.get_params())

{'objective': 'multi:softprob', 'use_label_encoder': True, 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.01, 'max_delta_step': 0, 'max_depth': 5, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 100, 'n_jobs': 24, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 1, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': None, 'subsample': 1, 'tree_method': 'gpu_hist', 'validate_parameters': 1, 'verbosity': None}


> ## #️⃣ Save model

In [49]:
numclass = 3

In [50]:
#Save Model
import os
import pickle
import imageio

save_pathimg = f'/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_{numclass}Classes/fold{fold}'
##**Mkdir Directory 
os.makedirs(save_pathimg, exist_ok=True)     
filename = f"XGboost_Model_Blood{numclass}Class_fold{fold}.pkl"
Model2Save =  f"{save_pathimg}/{filename}"
print(f"[INFO]: Done!! Save Model as : {Model2Save}")

with open(Model2Save, 'wb') as file:
    pickle.dump(modelXGBOOST, file)

[INFO]: Done!! Save Model as : /media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_3Classes/fold3/XGboost_Model_Blood3Class_fold3.pkl
