# Final classification with Logistic Regression
### This file contains the training and testing of Logistic Regression with and without fusion

## Without fusion

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn import metrics

In [2]:
df = pd.read_excel("cnn.xlsx")

In [4]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50, 'target'],
      dtype='object')

In [5]:
X = df.drop(['target'], axis=1)

y = df['target']

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [7]:
X_train.shape, X_test.shape

((438, 50), (189, 50))

In [8]:
cols = X_train.columns

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [10]:
X_train = pd.DataFrame(X_train, columns=[cols])

In [11]:
X_test = pd.DataFrame(X_test, columns=[cols])

In [18]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(solver='liblinear', random_state=0)
logreg.fit(X_train, y_train)

y_pred=logreg.predict(X_test)

In [19]:
print('Training set score: {:.4f}'.format(logreg.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(logreg.score(X_test, y_test)))

Training set score: 0.9087
Test set score: 0.8995


In [20]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[84  9]
 [10 86]]

True Positives(TP) =  84

True Negatives(TN) =  86

False Positives(FP) =  9

False Negatives(FN) =  10


In [21]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.90      0.90        93
           1       0.91      0.90      0.90        96

    accuracy                           0.90       189
   macro avg       0.90      0.90      0.90       189
weighted avg       0.90      0.90      0.90       189



In [22]:
TP = cm[0,0]
TN = cm[1,1]
FP = cm[0,1]
FN = cm[1,0]

classification_accuracy = (TP + TN) / float(TP + TN + FP + FN)

print('Classification accuracy : {0:0.4f}'.format(classification_accuracy))

precision = TP / float(TP + FP)

print('Precision : {0:0.4f}'.format(precision))

recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

Classification accuracy : 0.8995
Precision : 0.9032
Recall or Sensitivity : 0.8936
Specificity : 0.9053


## With fusion

In [23]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [24]:
df = pd.read_excel("cnn_abcd.xlsx",sheet_name=2)

In [26]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50,       51,       52,       53,       54, 'target'],
      dtype='object')

In [27]:
X = df.drop(['target'], axis=1)

y = df['target']

In [28]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [29]:
X_train.shape, X_test.shape

((432, 54), (186, 54))

In [30]:
cols = X_train.columns

In [31]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [32]:
X_train = pd.DataFrame(X_train, columns=[cols])

In [33]:
X_test = pd.DataFrame(X_test, columns=[cols])

In [35]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(solver='liblinear', random_state=0)
logreg.fit(X_train, y_train)

y_pred=logreg.predict(X_test)

In [36]:
print('Training set score: {:.4f}'.format(logreg.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(logreg.score(X_test, y_test)))

Training set score: 0.9097
Test set score: 0.9301


In [37]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[90  7]
 [ 6 83]]

True Positives(TP) =  90

True Negatives(TN) =  83

False Positives(FP) =  7

False Negatives(FN) =  6


In [38]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93        97
           1       0.92      0.93      0.93        89

    accuracy                           0.93       186
   macro avg       0.93      0.93      0.93       186
weighted avg       0.93      0.93      0.93       186



In [39]:
TP = cm[0,0]
TN = cm[1,1]
FP = cm[0,1]
FN = cm[1,0]

classification_accuracy = (TP + TN) / float(TP + TN + FP + FN)

print('Classification accuracy : {0:0.4f}'.format(classification_accuracy))

precision = TP / float(TP + FP)

print('Precision : {0:0.4f}'.format(precision))

recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

Classification accuracy : 0.9301
Precision : 0.9278
Recall or Sensitivity : 0.9375
Specificity : 0.9222
