In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from numpy import mean
from numpy import std
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

%matplotlib inline
import matplotlib.pyplot as plt
import joblib

In [2]:
# read csv using pandas
train_df = pd.read_csv('./train.csv')
valid_df = pd.read_csv('./valid.csv')
test_df = pd.read_csv('./test.csv')

In [3]:
df_combine = [train_df, valid_df]
train_df = pd.concat(df_combine)


In [4]:
# extract specified feature data

first_order_feat = \
['original_firstorder_10Percentile','original_firstorder_90Percentile', 'original_firstorder_Energy',
'original_firstorder_Entropy', 'original_firstorder_InterquartileRange','original_firstorder_Kurtosis',
'original_firstorder_Maximum','original_firstorder_MeanAbsoluteDeviation','original_firstorder_Mean',
'original_firstorder_Median', 'original_firstorder_Minimum','original_firstorder_Range',
'original_firstorder_RobustMeanAbsoluteDeviation','original_firstorder_RootMeanSquared',
'original_firstorder_Skewness','original_firstorder_TotalEnergy','original_firstorder_Uniformity',
'original_firstorder_Variance']
 
glcm_feat= \
['original_glcm_Autocorrelation', 'original_glcm_ClusterProminence', 
'original_glcm_ClusterShade','original_glcm_ClusterTendency','original_glcm_Contrast', 'original_glcm_Correlation',
'original_glcm_DifferenceAverage','original_glcm_DifferenceEntropy','original_glcm_DifferenceVariance',
'original_glcm_Id','original_glcm_Idm','original_glcm_Idmn','original_glcm_Idn','original_glcm_Imc1',
'original_glcm_Imc2', 'original_glcm_InverseVariance', 'original_glcm_JointAverage', 'original_glcm_JointEnergy',
'original_glcm_JointEntropy', 'original_glcm_MCC', 'original_glcm_MaximumProbability', 'original_glcm_SumAverage',
'original_glcm_SumEntropy', 'original_glcm_SumSquares']
                      
gldm_feat= \
['original_gldm_DependenceEntropy', 'original_gldm_DependenceNonUniformity',
'original_gldm_DependenceNonUniformityNormalized', 'original_gldm_DependenceVariance', 'original_gldm_GrayLevelNonUniformity',
'original_gldm_GrayLevelVariance', 'original_gldm_HighGrayLevelEmphasis','original_gldm_LargeDependenceEmphasis',
'original_gldm_LargeDependenceHighGrayLevelEmphasis','original_gldm_LowGrayLevelEmphasis', 'original_gldm_SmallDependenceEmphasis',
'original_gldm_SmallDependenceHighGrayLevelEmphasis', 'original_gldm_SmallDependenceLowGrayLevelEmphasis']

glrlm_feat= \
['original_glrlm_GrayLevelNonUniformity','original_glrlm_GrayLevelNonUniformityNormalized','original_glrlm_GrayLevelVariance',
'original_glrlm_HighGrayLevelRunEmphasis', 'original_glrlm_LongRunEmphasis', 'original_glrlm_LongRunHighGrayLevelEmphasis',
'original_glrlm_LongRunLowGrayLevelEmphasis', 'original_glrlm_LongRunLowGrayLevelEmphasis', 'original_glrlm_LowGrayLevelRunEmphasis',
'original_glrlm_RunEntropy', 'original_glrlm_RunLengthNonUniformity', 'original_glrlm_RunLengthNonUniformityNormalized',
'original_glrlm_RunPercentage', 'original_glrlm_RunVariance', 'original_glrlm_ShortRunEmphasis',
'original_glrlm_ShortRunHighGrayLevelEmphasis','original_glrlm_ShortRunLowGrayLevelEmphasis']
            
glszm_feat= \
['original_glszm_GrayLevelNonUniformity','original_glszm_GrayLevelNonUniformityNormalized', 'original_glszm_GrayLevelVariance','original_glszm_HighGrayLevelZoneEmphasis',
'original_glszm_LargeAreaEmphasis','original_glszm_LargeAreaHighGrayLevelEmphasis','original_glszm_LargeAreaLowGrayLevelEmphasis',
'original_glszm_LowGrayLevelZoneEmphasis','original_glszm_SizeZoneNonUniformity', 'original_glszm_SizeZoneNonUniformityNormalized',
'original_glszm_SmallAreaEmphasis','original_glszm_SmallAreaHighGrayLevelEmphasis','original_glszm_SmallAreaLowGrayLevelEmphasis',
'original_glszm_ZoneEntropy','original_glszm_ZonePercentage','original_glszm_ZoneVariance']
 
ngtdm_feat= \
 ['original_ngtdm_Busyness','original_ngtdm_Coarseness','original_ngtdm_Complexity',
  'original_ngtdm_Contrast','original_ngtdm_Strength']

sfta_feat=\
 ['sfta1','sfta2','sfta3','sfta4','sfta5','sfta6','sfta7','sfta8','sfta9','sfta10','sfta11','sfta12','sfta13','sfta14','sfta15','sfta16','sfta17','sfta18']


# combine all features
feature_col = first_order_feat + glcm_feat + gldm_feat + glrlm_feat + glszm_feat + ngtdm_feat + sfta_feat
 
print(f'Number of features : {len(feature_col)}')

class_col = ['class']
train_X = train_df.loc[:,feature_col].values
train_Y = (train_df.loc[:,class_col].values).reshape(-1)

test_X = test_df.loc[:,feature_col].values
test_Y = (test_df.loc[:,class_col].values).reshape(-1)


Number of features : 111


In [5]:
# normalizing the data

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

sc = StandardScaler()
train_X = sc.fit_transform(train_X)
test_X = sc.fit_transform(test_X)


In [6]:
#pca
pca = PCA(n_components=70)# adjust yourself
pca.fit(train_X)
train_X = pca.transform(train_X)
test_X = pca.transform(test_X)

print(test_X.shape)

(1166, 70)


In [7]:
#logistic Reqgression
param_grid = {'max_iter':[2000,3000,4000],'solver':['newton-cg','lbfgs'],'penalty':['l1','l2'],
              'C': [0.01,0.1, 1, 10, 100]}


model = LogisticRegression()
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = 15)
grid.fit(train_X, train_Y)

best_grid = grid.best_params_
print('LogisticRegression:',best_grid)

model = LogisticRegression(C = best_grid['C'], max_iter = best_grid['max_iter'],class_weight='balanced', 
                           multi_class='multinomial', solver = best_grid['solver'], random_state=42)
model.fit(train_X,train_Y)
print('Model Train Score: %.3f, ' %model.score(train_X, train_Y))
joblib.dump(model, 'logistic_regression.pkl')

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/media/jhanvi/my_drive/anaconda/media/jhanvi/my_drive/anaconda/envs/python38/lib/python3.8/site-packages/sklearn/model_sel

 0.80903018 0.80903018        nan        nan 0.80903018 0.80903018
        nan        nan 0.84722055 0.84700619        nan        nan
 0.84722055 0.84700619        nan        nan 0.84722055 0.84700619
        nan        nan 0.86524496 0.86524496        nan        nan
 0.86524496 0.86524496        nan        nan 0.86524496 0.86524496
        nan        nan 0.86974795 0.86996231        nan        nan
 0.86974795 0.86996231        nan        nan 0.86974795 0.86996231
        nan        nan 0.86931715 0.8691021         nan        nan
 0.86931715 0.8691021         nan        nan 0.86931715 0.8691021 ]


LogisticRegression: {'C': 10, 'max_iter': 2000, 'penalty': 'l2', 'solver': 'lbfgs'}
Model Train Score: 0.881, 


['logistic_regression.pkl']

In [9]:
param_grid = {'C': [0.01,0.1, 1, 10], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

model = svm.SVC()

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = 15)
grid.fit(train_X, train_Y)

best_grid = grid.best_params_
print('SVM Grid:',best_grid)

model = svm.SVC(C=best_grid['C'],kernel = best_grid['kernel'],
                  gamma = best_grid['gamma'],random_state=42)

model.fit(train_X,train_Y)
print('Model Train Score: %.3f, ' %model.score(train_X, train_Y))
joblib.dump(model, 'svm_grid.pkl')

SVM Grid: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
Model Train Score: 0.939, 


['svm_grid.pkl']

In [10]:
#RandomForestClassifier

param_grid = {'max_depth': [15,20,30],'n_estimators': [200,300,400]}


model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = 15)
grid.fit(train_X, train_Y)

best_grid = grid.best_params_
print('RandomForestClassifier:',best_grid)
rfc=RandomForestClassifier(random_state=0, n_estimators= best_grid['n_estimators'], 
                            max_depth=best_grid['max_depth'])

rfc.fit(train_X,train_Y)

print('Model Train Score: %.3f, ' %rfc.score(train_X, train_Y))
joblib.dump(rfc, 'rfc.pkl')



RandomForestClassifier: {'max_depth': 20, 'n_estimators': 400}
Model Train Score: 1.000, 


['rfc.pkl']

In [11]:
# SGDClassifier

param_grid = {'max_iter': [4000,2000, 3000],"penalty":["l1","l2"],"alpha":[0.001,0.0001,0.01]}

model = SGDClassifier()
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = 15)
grid.fit(train_X, train_Y)

best_grid = grid.best_params_
print('SGDClassifier:',best_grid)
sgd = SGDClassifier(random_state=42,max_iter=best_grid['max_iter'],penalty=best_grid['penalty'],
                    alpha=best_grid['alpha'])

sgd.fit(train_X,train_Y)
print('Model Train Score: %.3f, ' %sgd.score(train_X, train_Y))

joblib.dump(sgd, 'sgd.pkl')


SGDClassifier: {'alpha': 0.0001, 'max_iter': 2000, 'penalty': 'l1'}
Model Train Score: 0.870, 


['sgd.pkl']

In [12]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

target_names = ['COVID-19', 'Non-COVID', 'Normal']

logistic_regression = joblib.load('logistic_regression.pkl')
# svm_bagging = joblib.load('svm_bagging.pkl')
rfc = joblib.load('rfc.pkl')
sgd = joblib.load('sgd.pkl')
svm_grid = joblib.load('svm_grid.pkl')


yhat_lr = logistic_regression.predict(test_X)
lr_report = classification_report(test_Y, yhat_lr, target_names=target_names)
lr_accuracy = accuracy_score(test_Y, yhat_lr)
print("Logistic Regression Test Set Prediction:")
print(lr_report)
print(f"Test Accuracy :{lr_accuracy} ")

yhat_rfc = rfc.predict(test_X)
rfc_report = classification_report(test_Y, yhat_rfc, target_names=target_names)
rfc_accuracy = accuracy_score(test_Y, yhat_rfc)
print("Random Forest Classfication Test Set Prediction:")
print(rfc_report)
print(f"Test Accuracy :{rfc_accuracy} ")

yhat_sgd = sgd.predict(test_X)
sgd_report = classification_report(test_Y, yhat_sgd, target_names=target_names)
sgd_accuracy = accuracy_score(test_Y, yhat_sgd)
print("Sochastic Gradient Descent Test Prediction:")
print(sgd_report)
print(f"Test Accuracy :{sgd_accuracy} ")


yhat_svm_grid = svm_grid.predict(test_X)
svm_grid_report = classification_report(test_Y, yhat_svm_grid, target_names=target_names)
svm_grid_accuracy = accuracy_score(test_Y, yhat_svm_grid)
print("Support Vector machine Test Prediction:")
print(svm_grid_report)
print(f"Test Accuracy :{svm_grid_accuracy} ")



Logistic Regression Test Set Prediction:
              precision    recall  f1-score   support

    COVID-19       0.98      0.94      0.96       583
   Non-COVID       0.80      0.87      0.84       292
      Normal       0.86      0.85      0.85       291

    accuracy                           0.90      1166
   macro avg       0.88      0.89      0.88      1166
weighted avg       0.90      0.90      0.90      1166

Test Accuracy :0.9005145797598628 
Random Forest Classfication Test Set Prediction:
              precision    recall  f1-score   support

    COVID-19       0.83      0.98      0.90       583
   Non-COVID       0.87      0.71      0.78       292
      Normal       0.88      0.74      0.81       291

    accuracy                           0.85      1166
   macro avg       0.86      0.81      0.83      1166
weighted avg       0.86      0.85      0.85      1166

Test Accuracy :0.8524871355060034 
Sochastic Gradient Descent Test Prediction:
              precision    recall 