In [1]:
# Import Library
import pandas as pd
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
from sklearn import svm
from sklearn.model_selection import cross_validate

from imblearn.over_sampling import SMOTE

# 📂DATA : 2 Classes

In [2]:
fold = 3

In [3]:
path = "/home/kannika/code/Rheology2023/Rheology_Blood/DataBlood_Viscosity_TrainML_3Fold_EMclass.csv"
data_feature = pd.read_csv(path)
print(data_feature.shape)
print("-"*100)
print(f"All Fold : {set(data_feature.fold)}")
## Split Train data Set
feature_train = data_feature[data_feature["fold"]!=fold].reset_index(drop=True)
print(f"Train Set : Fold ==> {set(feature_train.fold)}")
print("Train = ", feature_train.shape)
## Split Valid data Set
feature_test = data_feature[data_feature["fold"]==fold].reset_index(drop=True)
print(f"Valiadtion Set : Fold ==> {set(feature_test.fold)}")
print("Validation = ", feature_test.shape)
## Print DataFrame
feature_train.head()

(33, 11)
----------------------------------------------------------------------------------------------------
All Fold : {1, 2, 3}
Train Set : Fold ==> {1, 2}
Train =  (22, 11)
Valiadtion Set : Fold ==> {3}
Validation =  (11, 11)


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Code,classes,subclass,classes_binary,MCV,MCH,Hb,typeBEvsBM,fold
0,0,42,HN44,HN,Splenectomy,1.0,61.7,19.7,7.0,E,1
1,1,17,HN21,HN,No_Splenectomy,1.0,64.8,19.7,6.5,E,1
2,2,8,HN13,HN,Splenectomy,1.0,72.6,23.4,5.8,E,1
3,3,43,HN29,HN,No_Splenectomy,1.0,64.2,19.1,6.6,E,1
4,4,50,HN39,HN,Splenectomy,1.0,76.4,22.5,6.4,E,1


In [4]:
X_train = feature_train[['MCV','MCH','Hb']]
y_train = feature_train["typeBEvsBM"]
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

(22, 3)
(22,)
E


In [5]:
print(len(list(set(y_train))))
print(set(y_train))

2
{'M', 'E'}


In [6]:
print("Before OverSampling, counts of label 'BE': {}".format(sum(y_train=='E')))
print("Before OverSampling, counts of label 'BM': {} \n".format(sum(y_train=='M')))

Before OverSampling, counts of label 'BE': 16
Before OverSampling, counts of label 'BM': 6 



# 💡Dealing with Class Imbalance with SMOTE

In [7]:
X_train_smote, y_train_smote = SMOTE().fit_resample(X_train, y_train.ravel())
# sm = SMOTE(random_state=100)
# X_train_smote, y_train_smote = sm.fit_sample(X_train, y_train.ravel())

print('After OverSampling, the shape of train_X: {}'.format(X_train_smote.shape))
print('After OverSampling, the shape of train_y: {} \n'.format(y_train_smote.shape))

print("After OverSampling, counts of label 'BE': {}".format(sum(y_train_smote=='E')))
print("After OverSampling, counts of label 'BM': {}".format(sum(y_train_smote=='M')))

After OverSampling, the shape of train_X: (32, 3)
After OverSampling, the shape of train_y: (32,) 

After OverSampling, counts of label 'BE': 16
After OverSampling, counts of label 'BM': 16


## 🩸 Parameter:  SVM

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [9]:
Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]
kernels = ['linear', 'rbf']

## Find optimiz paremeter 
from sklearn import svm
svm = svm.SVC(random_state = 1)
param_grid = {'C': Cs, 'gamma' : gammas, 'kernel' : kernels}
grid_search = GridSearchCV(svm, param_grid, cv=10, verbose=1, n_jobs=-1) ##cv=10 == 10-fold validation 
SVM = grid_search.fit(X_train_smote, y_train_smote.ravel())

Fitting 10 folds for each of 40 candidates, totalling 400 fits


In [10]:
# view the results as a pandas DataFrame
best_svm_df = pd.DataFrame(SVM.cv_results_)[['rank_test_score', 'mean_test_score', 'std_test_score', 'params']]
best_svm_df = best_svm_df.sort_values(by='rank_test_score', ascending=True)
best_svm_df = best_svm_df.reset_index(drop=True)
best_svm_df.head()

Unnamed: 0,rank_test_score,mean_test_score,std_test_score,params
0,1,0.975,0.075,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"
1,2,0.883333,0.145297,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,2,0.883333,0.145297,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
3,4,0.841667,0.160078,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
4,5,0.833333,0.223607,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"


In [11]:
# examine the first result
print("**examine the first result","\n")

print(SVM.cv_results_['params'][0])
print(SVM.cv_results_['mean_test_score'][0])

# print the array of mean scores only
print("\n","**print the array of mean scores only","\n")

grid_mean_scores = SVM.cv_results_['mean_test_score']
print(grid_mean_scores)

# examine the best model
print("\n","**examine the best model","\n")

print(SVM.best_score_)
print(SVM.best_params_)
print(SVM.best_estimator_)

**examine the first result 

{'C': 0.001, 'gamma': 0.001, 'kernel': 'linear'}
0.5833333333333333

 **print the array of mean scores only 

[0.58333333 0.41666667 0.58333333 0.41666667 0.58333333 0.44166667
 0.58333333 0.36666667 0.60833333 0.41666667 0.60833333 0.41666667
 0.60833333 0.44166667 0.60833333 0.36666667 0.675      0.41666667
 0.675      0.41666667 0.675      0.44166667 0.675      0.36666667
 0.675      0.55       0.675      0.575      0.675      0.84166667
 0.675      0.88333333 0.70833333 0.61666667 0.70833333 0.83333333
 0.70833333 0.975      0.70833333 0.88333333]

 **examine the best model 

0.975
{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
SVC(C=10, gamma=0.1, random_state=1)


In [12]:
#Print the tured parameters and score
print("Tuned Decision Tree Parameters: {}".format(SVM.best_params_))
print("Best score is {}".format(SVM.best_score_))

Tuned Decision Tree Parameters: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
Best score is 0.975


In [13]:
##print(bestF)
best_params_ = SVM.best_params_
C_ = best_params_['C']  ##<--- best_params = Cs
gamma_ = best_params_['gamma'] ##<--- best_params = gammas
kernel_ = best_params_['kernel'] ##<--- best_params = kernels

> ## 🚀 Fit Model and setting parameters

In [15]:
from sklearn import svm

SVM = svm.SVC(random_state = 1, kernel=kernel_, C=C_, gamma=gamma_, probability=True ) #เปลี่ยนตาม parameter
modelSVM = SVM.fit(X_train_smote, y_train_smote.ravel()) # train SVM model
modelSVM

SVC(C=10, gamma=0.1, probability=True, random_state=1)

In [16]:
### Testing Check Parameter
params = modelSVM.get_params()
print(params)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': -1, 'probability': True, 'random_state': 1, 'shrinking': True, 'tol': 0.001, 'verbose': False}


> ## #️⃣ Save model

In [17]:
nameclass = "typeBEvsBM_SmoteClasses"

In [18]:
#Save Model
import os
import pickle
import imageio

save_pathimg = f'/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/{nameclass}/fold{fold}'
##**Mkdir Directory 
os.makedirs(save_pathimg, exist_ok=True)     
filename = f"SVM_Model_{nameclass}_fold{fold}.pkl"
Model2Save =  f"{save_pathimg}/{filename}"
print(f"[INFO]: Save Nodel as : [{Model2Save}]")

with open(Model2Save, 'wb') as file:
    pickle.dump(modelSVM, file)

[INFO]: Save Nodel as : [/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/typeBEvsBM_SmoteClasses/fold3/SVM_Model_typeBEvsBM_SmoteClasses_fold3.pkl]
