In [19]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 📂DATA : 2 Classes

In [64]:
fold = 6

In [65]:
path = "/home/kannika/code/Rheology2023/Rheology_Blood/DataBlood_Viscosity_TrainML_6Fold_split2class.csv"
data_feature = pd.read_csv(path)
print(data_feature.shape)
print("-"*100)
print(f"All Fold : {set(data_feature.fold)}")
## Split Train data Set
feature_train = data_feature[data_feature["fold"]!=fold].reset_index(drop=True)
print(f"Train Set : Fold ==> {set(feature_train.fold)}")
print("Train = ", feature_train.shape)
## Split Valid data Set
feature_test = data_feature[data_feature["fold"]==fold].reset_index(drop=True)
print(f"Valiadtion Set : Fold ==> {set(feature_test.fold)}")
print("Validation = ", feature_test.shape)
## Print DataFrame
feature_train.head()

(51, 9)
----------------------------------------------------------------------------------------------------
All Fold : {1, 2, 3, 4, 5, 6}
Train Set : Fold ==> {1, 2, 3, 4, 5}
Train =  (43, 9)
Valiadtion Set : Fold ==> {6}
Validation =  (8, 9)


Unnamed: 0.1,Unnamed: 0,Code,classes,subclass,fold,classes_binary,MCV,MCH,Hb
0,0,HN35,HN,No_Splenectomy,1,1.0,74.3,23.4,7.1
1,1,HN43,HN,No_Splenectomy,1,1.0,57.4,17.7,4.4
2,2,HN38,HN,No_Splenectomy,1,1.0,72.9,22.6,8.0
3,3,NBL14,NBL,Normal,1,0.0,86.8,28.1,13.2
4,4,NBL15,NBL,Normal,1,0.0,88.2,27.9,13.9


In [66]:
X_train = feature_train[['MCV','MCH','Hb']]
y_train = feature_train["classes"]
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

(43, 3)
(43,)
HN


In [67]:
y_train = feature_train["classes"]
print(len(list(set(y_train))))
print(set(y_train))

2
{'HN', 'NBL'}


## 🩸 Parameter:  Random Forest

In [68]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [69]:
n_estimators = [400]
max_depth = [5, 8, 15, 25, 30]
min_samples_split = [2, 5, 10, 15, 100] 

forest = RandomForestClassifier(random_state = 1)

hyperF ={'n_estimators' : n_estimators, 'max_depth' : max_depth, 'min_samples_split' : min_samples_split}
# hyperF ={'max_depth' : max_depth, 'min_samples_split' : min_samples_split}

gridF = GridSearchCV(forest, hyperF, cv = 10, verbose = 1, n_jobs = -1)
bestF = gridF.fit(X_train, y_train)

Fitting 10 folds for each of 25 candidates, totalling 250 fits


In [70]:
# view the results as a pandas DataFrame
bestF_df = pd.DataFrame(bestF.cv_results_)[['mean_test_score', 'std_test_score', 'params']]
bestF_df = bestF_df.sort_values(by='mean_test_score', ascending=False)
bestF_df = bestF_df.reset_index(drop=True)
bestF_df.head()

Unnamed: 0,mean_test_score,std_test_score,params
0,0.95,0.1,"{'max_depth': 5, 'min_samples_split': 2, 'n_es..."
1,0.95,0.1,"{'max_depth': 5, 'min_samples_split': 5, 'n_es..."
2,0.95,0.1,"{'max_depth': 30, 'min_samples_split': 15, 'n_..."
3,0.95,0.1,"{'max_depth': 30, 'min_samples_split': 10, 'n_..."
4,0.95,0.1,"{'max_depth': 30, 'min_samples_split': 5, 'n_e..."


In [71]:
# examine the first result
print("**examine the first result","\n")

print(bestF.cv_results_['params'][0])
print(bestF.cv_results_['mean_test_score'][0])

# print the array of mean scores only
print("\n","**print the array of mean scores only","\n")

grid_mean_scores = bestF.cv_results_['mean_test_score']
print(grid_mean_scores)

# examine the best model
print("\n","**examine the best model","\n")

print(bestF.best_score_)
print(bestF.best_params_)
print(bestF.best_estimator_)

**examine the first result 

{'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 400}
0.95

 **print the array of mean scores only 

[0.95  0.95  0.95  0.95  0.705 0.95  0.95  0.95  0.95  0.705 0.95  0.95
 0.95  0.95  0.705 0.95  0.95  0.95  0.95  0.705 0.95  0.95  0.95  0.95
 0.705]

 **examine the best model 

0.95
{'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 400}
RandomForestClassifier(max_depth=5, n_estimators=400, random_state=1)


In [72]:
#Print the tured parameters and score
print("Tuned Decision Tree Parameters: {}".format(bestF.best_params_))
print("Best score is {}".format(bestF.best_score_))

Tuned Decision Tree Parameters: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 400}
Best score is 0.95


In [73]:
depth = bestF.best_params_['max_depth']
samples_split = bestF.best_params_['min_samples_split']
estimators = bestF.best_params_['n_estimators']

> ## 🚀 Fit Model and setting parameters

In [74]:
forestOpt = RandomForestClassifier(random_state=1, max_depth=depth, n_estimators=estimators, min_samples_split=samples_split)
modelOpt = forestOpt.fit(X_train, y_train)#

In [75]:
### Testing Check Parameter
params = modelOpt.get_params()
print(params)

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 400, 'n_jobs': None, 'oob_score': False, 'random_state': 1, 'verbose': 0, 'warm_start': False}


> ## #️⃣ Save model

In [76]:
numclass = 2

In [77]:
#Save Model

import pickle
import imageio

save_pathimg = f'/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_{numclass}Classes/fold{fold}'
##**Mkdir Directory 
os.makedirs(save_pathimg, exist_ok=True)     
filename = f"RFModel_Blood{numclass}Class_fold{fold}.pkl"
Model2Save =  f"{save_pathimg}/{filename}"
print(f"[INFO]: Save Nodel as : [{Model2Save}]")

with open(Model2Save, 'wb') as file:
    pickle.dump(modelOpt, file)

[INFO]: Save Nodel as : [/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_2Classes/fold6/RFModel_Blood2Class_fold6.pkl]
