In [1]:
import os
import pandas as pd 
import numpy as np 
import scipy as scp
import sklearn

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn import metrics 
from sklearn.metrics import confusion_matrix

import statsmodels.api as sm
import matplotlib.pyplot as plt

#from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# 📂DATA : 2 Classes

In [3]:
fold = 1

In [4]:
path = "/home/kannika/code/Rheology2023/Rheology_Blood/DataBlood_Viscosity_TrainML_6Fold_split2class.csv"
data_feature = pd.read_csv(path)
print(data_feature.shape)
print("-"*100)
print(f"All Fold : {set(data_feature.fold)}")
## Split Train data Set
feature_train = data_feature[data_feature["fold"]!=fold].reset_index(drop=True)
print(f"Train Set : Fold ==> {set(feature_train.fold)}")
print("Train = ", feature_train.shape)
## Split Valid data Set
feature_test = data_feature[data_feature["fold"]==fold].reset_index(drop=True)
print(f"Valiadtion Set : Fold ==> {set(feature_test.fold)}")
print("Validation = ", feature_test.shape)
## Print DataFrame
feature_train.head()

(51, 9)
----------------------------------------------------------------------------------------------------
All Fold : {1, 2, 3, 4, 5, 6}
Train Set : Fold ==> {2, 3, 4, 5, 6}
Train =  (42, 9)
Valiadtion Set : Fold ==> {1}
Validation =  (9, 9)


Unnamed: 0.1,Unnamed: 0,Code,classes,subclass,fold,classes_binary,MCV,MCH,Hb
0,9,HN34,HN,No_Splenectomy,2,1.0,57.6,18.3,9.3
1,10,HN33,HN,No_Splenectomy,2,1.0,77.1,25.3,8.5
2,11,HN14,HN,No_Splenectomy,2,1.0,78.8,25.2,5.7
3,12,NBL19,NBL,Normal,4,0.0,89.5,27.8,14.3
4,13,HN19,HN,Splenectomy,2,1.0,81.3,25.9,8.9


In [5]:
X_train = feature_train[['MCV','MCH','Hb']]
y_train = feature_train["classes"]
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

(42, 3)
(42,)
HN


In [6]:
y_train = feature_train["classes"]
print(len(list(set(y_train))))
print(set(y_train))

2
{'HN', 'NBL'}


## 🩸 Parameter:  Logistic Regression

In [7]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [8]:
LR = LogisticRegression(random_state = 1)

LRparam_grid = {
    'C' : [0.001, 0.01, 0.1, 1],
    'penalty': ['l1', 'l2'],
    'max_iter': list(range(100,500,100)),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

LR_search = GridSearchCV(LR, param_grid=LRparam_grid, refit = True, verbose = 1, cv=10, n_jobs = -1)
# fitting the model for grid search 
best_LR = LR_search.fit(X_train, y_train)

Fitting 10 folds for each of 160 candidates, totalling 1600 fits


In [10]:
# view the results as a pandas DataFrame
best_LRdf = pd.DataFrame(best_LR.cv_results_)[['rank_test_score', 'mean_test_score', 'std_test_score', 'params']]
best_LRdf = best_LRdf.sort_values(by='rank_test_score', ascending=True)
best_LRdf = best_LRdf.reset_index(drop=True)
best_LRdf.head()

Unnamed: 0,rank_test_score,mean_test_score,std_test_score,params
0,1,1.0,0.0,"{'C': 1, 'max_iter': 300, 'penalty': 'l2', 'so..."
1,1,1.0,0.0,"{'C': 1, 'max_iter': 200, 'penalty': 'l2', 'so..."
2,1,1.0,0.0,"{'C': 1, 'max_iter': 400, 'penalty': 'l2', 'so..."
3,1,1.0,0.0,"{'C': 1, 'max_iter': 400, 'penalty': 'l2', 'so..."
4,1,1.0,0.0,"{'C': 1, 'max_iter': 100, 'penalty': 'l2', 'so..."


In [11]:
# examine the first result
print("**examine the first result","\n")

print(best_LR.cv_results_['params'][0])
print(best_LR.cv_results_['mean_test_score'][0])

# print the array of mean scores only
print("\n","**print the array of mean scores only","\n")

grid_mean_scores = best_LR.cv_results_['mean_test_score']
print(grid_mean_scores)

# examine the best model
print("\n","**examine the best model","\n")

print(best_LR.best_score_)
print(best_LR.best_params_)
print(best_LR.best_estimator_)

**examine the first result 

{'C': 0.001, 'max_iter': 100, 'penalty': 'l1', 'solver': 'newton-cg'}
nan

 **print the array of mean scores only 

[  nan   nan 0.695   nan 0.695 0.815 0.815 0.695 0.695 0.695   nan   nan
 0.695   nan 0.695 0.815 0.815 0.695 0.695 0.695   nan   nan 0.695   nan
 0.695 0.815 0.815 0.695 0.695 0.695   nan   nan 0.695   nan 0.695 0.815
 0.815 0.695 0.695 0.695   nan   nan 0.695   nan 0.695 0.925 0.925 0.785
 0.785 0.785   nan   nan 0.695   nan 0.695 0.925 0.925 0.785 0.785 0.785
   nan   nan 0.695   nan 0.695 0.925 0.925 0.785 0.785 0.785   nan   nan
 0.695   nan 0.695 0.925 0.925 0.785 0.785 0.785   nan   nan 0.93    nan
 0.83  0.95  0.95  0.905 0.855 0.855   nan   nan 0.93    nan 0.855 0.95
 0.95  0.905 0.905 0.855   nan   nan 0.93    nan 0.88  0.95  0.95  0.905
 0.905 0.905   nan   nan 0.93    nan 0.905 0.95  0.95  0.905 0.905 0.905
   nan   nan 0.905   nan 0.855 1.    1.    0.905 0.905 0.855   nan   nan
 0.905   nan 0.905 1.    1.    0.905 0.905 0.905   na

In [12]:
#Print the tured parameters and score
print("Tuned Decision Tree Parameters: {}".format(best_LR.best_params_))
print("Best score is {}".format(best_LR.best_score_))

Tuned Decision Tree Parameters: {'C': 1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
Best score is 1.0


In [13]:
best_params_ =  best_LR.best_params_
C_ = best_params_['C'] 
max_iter_ = best_params_['max_iter'] 
penalty_ = best_params_['penalty']   
solver_ = best_params_['solver']

> ## 🚀 Fit Model and setting parameters

In [14]:
## **- train Fit Model XGBOOST
Logistic = LogisticRegression(C=C_, max_iter=max_iter_, penalty=penalty_, solver=solver_)
LogisticModel = Logistic.fit(X_train, y_train) # train XGBOOST model
LogisticModel

LogisticRegression(C=1, solver='newton-cg')

In [15]:
### Testing Check Parameter
print(LogisticModel.get_params())

{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}


> ## #️⃣ Save model

In [16]:
numclass = 2

In [18]:
#Save Model
import os
import pickle
import imageio

save_pathimg = f'/media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_{numclass}Classes/fold{fold}'
##**Mkdir Directory 
os.makedirs(save_pathimg, exist_ok=True)     
filename = f"Logis_Model_Blood{numclass}Class_fold{fold}.pkl"
Model2Save =  f"{save_pathimg}/{filename}"
print(f"[INFO]: Done!! Save Model as : {Model2Save}")

with open(Model2Save, 'wb') as file:
    pickle.dump(LogisticModel, file)

[INFO]: Done!! Save Model as : /media/tohn/HDD/rheology2023/ML_Model/Blood_Viscosity/_2Classes/fold1/Logis_Model_Blood2Class_fold1.pkl
