In [1]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 📂DATA : 3 Classes

In [2]:
fold = 1

In [3]:
path = "/home/kannika/code/Rheology2023/Rheology_Blood/DataBlood_Viscosity_TrainML_3Fold_split3class.csv"
data_feature = pd.read_csv(path)
print(data_feature.shape)
print("-"*100)
print(f"All Fold : {set(data_feature.fold)}")
## Split Train data Set
feature_train = data_feature[data_feature["fold"]!=fold].reset_index(drop=True)
print(f"Train Set : Fold ==> {set(feature_train.fold)}")
print("Train = ", feature_train.shape)
## Split Valid data Set
feature_test = data_feature[data_feature["fold"]==fold].reset_index(drop=True)
print(f"Valiadtion Set : Fold ==> {set(feature_test.fold)}")
print("Validation = ", feature_test.shape)
## Print DataFrame
feature_train.head()

(51, 9)
----------------------------------------------------------------------------------------------------
All Fold : {1, 2, 3}
Train Set : Fold ==> {2, 3}
Train =  (35, 9)
Valiadtion Set : Fold ==> {1}
Validation =  (16, 9)


Unnamed: 0.1,Unnamed: 0,Code,classes,subclass,fold,classes_binary,MCV,MCH,Hb
0,16,HN22,HN,No_Splenectomy,2,1.0,65.6,20.2,7.5
1,17,HN21,HN,No_Splenectomy,2,1.0,64.8,19.7,6.5
2,18,HN20,HN,No_Splenectomy,2,1.0,69.5,20.5,6.1
3,19,NBL16,NBL,Normal,2,0.0,79.6,24.9,13.2
4,20,NBL21,NBL,Normal,2,0.0,74.9,23.9,14.5


In [9]:
X_train = feature_train[['MCV','MCH','Hb']]
y_train = feature_train["subclass"] ##** y Columns 
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

(35, 3)
(35,)
No_Splenectomy


In [10]:
y_train = feature_train["subclass"] ##** y Columns 
print(len(list(set(y_train))))
print(set(y_train))

3
{'Normal', 'Splenectomy', 'No_Splenectomy'}


## 🩸 Parameter:  Random Forest

In [11]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [12]:
n_estimators = [400]
max_depth = [5, 8, 15, 25, 30]
min_samples_split = [2, 5, 10, 15, 100] 

forest = RandomForestClassifier(random_state = 1)

hyperF ={'n_estimators' : n_estimators, 'max_depth' : max_depth, 'min_samples_split' : min_samples_split}
# hyperF ={'max_depth' : max_depth, 'min_samples_split' : min_samples_split}

gridF = GridSearchCV(forest, hyperF, cv = 10, verbose = 1, n_jobs = -1)
bestF = gridF.fit(X_train, y_train)

Fitting 10 folds for each of 25 candidates, totalling 250 fits


In [13]:
# view the results as a pandas DataFrame
bestF_df = pd.DataFrame(bestF.cv_results_)[['mean_test_score', 'std_test_score', 'params']]
bestF_df = bestF_df.sort_values(by='mean_test_score', ascending=False)
bestF_df = bestF_df.reset_index(drop=True)
bestF_df.head()

Unnamed: 0,mean_test_score,std_test_score,params
0,0.766667,0.311359,"{'max_depth': 15, 'min_samples_split': 10, 'n_..."
1,0.766667,0.311359,"{'max_depth': 5, 'min_samples_split': 10, 'n_e..."
2,0.766667,0.311359,"{'max_depth': 30, 'min_samples_split': 10, 'n_..."
3,0.766667,0.311359,"{'max_depth': 25, 'min_samples_split': 10, 'n_..."
4,0.766667,0.311359,"{'max_depth': 8, 'min_samples_split': 10, 'n_e..."


In [14]:
# examine the first result
print("**examine the first result","\n")

print(bestF.cv_results_['params'][0])
print(bestF.cv_results_['mean_test_score'][0])

# print the array of mean scores only
print("\n","**print the array of mean scores only","\n")

grid_mean_scores = bestF.cv_results_['mean_test_score']
print(grid_mean_scores)

# examine the best model
print("\n","**examine the best model","\n")

print(bestF.best_score_)
print(bestF.best_params_)
print(bestF.best_estimator_)

**examine the first result 

{'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 400}
0.7416666666666666

 **print the array of mean scores only 

[0.74166667 0.71666667 0.76666667 0.63333333 0.29166667 0.74166667
 0.71666667 0.76666667 0.63333333 0.29166667 0.74166667 0.71666667
 0.76666667 0.63333333 0.29166667 0.74166667 0.71666667 0.76666667
 0.63333333 0.29166667 0.74166667 0.71666667 0.76666667 0.63333333
 0.29166667]

 **examine the best model 

0.7666666666666666
{'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 400}
RandomForestClassifier(max_depth=5, min_samples_split=10, n_estimators=400,
                       random_state=1)


In [15]:
#Print the tured parameters and score
print("Tuned Decision Tree Parameters: {}".format(bestF.best_params_))
print("Best score is {}".format(bestF.best_score_))

Tuned Decision Tree Parameters: {'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 400}
Best score is 0.7666666666666666


In [17]:
depth = bestF.best_params_['max_depth']
samples_split = bestF.best_params_['min_samples_split']
estimators = bestF.best_params_['n_estimators']

> ## 🚀 Fit Model and setting parameters

In [18]:
forestOpt = RandomForestClassifier(random_state=1, max_depth=depth, n_estimators=estimators, min_samples_split=samples_split)
modelOpt = forestOpt.fit(X_train, y_train)#
modelOpt

RandomForestClassifier(max_depth=5, min_samples_split=10, n_estimators=400,
                       random_state=1)

In [19]:
### Testing Check Parameter
params = modelOpt.get_params()
print(params)

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 400, 'n_jobs': None, 'oob_score': False, 'random_state': 1, 'verbose': 0, 'warm_start': False}


> ## #️⃣ Save model

In [20]:
numclass = 3

In [21]:
#Save Model
import pickle
import imageio

save_pathimg = f'/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_{numclass}Classes/fold{fold}'
##**Mkdir Directory 
os.makedirs(save_pathimg, exist_ok=True)     
filename = f"RFModel_Blood{numclass}Class_fold{fold}.pkl"
Model2Save =  f"{save_pathimg}/{filename}"

with open(Model2Save, 'wb') as file:
    pickle.dump(modelOpt, file)