In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
data = np.load('/content/drive/MyDrive/data.npy')
labels = np.load('/content/drive/MyDrive/labels.npy')

In [None]:
#setting numpy and tensor flow seed to ensure reproducibility of the results
import tensorflow as tf

# Set the seed for reproducibility
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Preprocessing Steps



## Splitting Data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size= 0.2, stratify=labels, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size= 0.25, stratify=y_train, random_state=42) #split 0.25 this time, because 0.25*0.8 = a 0.2 split for validation
print(str(X_train.shape[0]) + ' train samples')
print(str(X_validation.shape[0]) + ' validation samples')
print(str(X_test.shape[0]) + ' test samples')

3834 train samples
1279 validation samples
1279 test samples


## Normalize Data

In [None]:
# encode Y class values as integers
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(y_train)
y_train = encoder.transform(y_train)
y_validation = encoder.transform(y_validation)
y_test = encoder.transform(y_test)

In [None]:
# Converting to categorical
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train) 
y_validation_cat = to_categorical(y_validation) 
y_test_cat = to_categorical(y_test)

In [None]:
# convert from integers to floats
X_train = X_train.astype('float32')
X_validation = X_validation.astype('float32')
X_test = X_test.astype('float32')

# Normalizing x values to 0-1
X_train = X_train / 255.0
X_validation = X_validation / 255.0
X_test = X_test / 255.0

Model 5
>The last activation function is changed from softmax to sigmoid. The batch size is changed to 128 instead of 32. The optimizer is adamax instead of adam. I also tried sgd and adagrad, but they had less accuracy

>https://link.springer.com/chapter/10.1007/3-540-59497-3_175 --> sigmoid function

>https://ieeexplore.ieee.org/abstract/document/8862686?casa_token=S7mxfzQoHosAAAAA:NlwNcU0HnJWufA2BqE2moDKfEwrSUdaupvkzUTe_yWFLvx2AwqMWZVlDtGIiXVFqV0-h4E5RieXUFac --> adamax


In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras import layers

# Create a sequential model
model5 = Sequential()
model5.add(layers.Conv2D(64,3,activation="relu",input_shape = (156,156,3),padding='same'))
model5.add(layers.Conv2D(32,3,activation="relu", padding='same'))
model5.add(layers.MaxPooling2D(pool_size=(2,2), padding='same'))
model5.add(layers.Conv2D(64,3, activation = "relu", padding='same'))
model5.add(layers.Conv2D(32,3, activation = "relu", padding='same'))
model5.add(layers.MaxPooling2D(pool_size=(2,2), padding='same'))
model5.add(layers.Flatten())

model5.add(layers.Dense(32, activation = "relu"))
model5.add(layers.Dense(32, activation = "relu"))
model5.add(layers.Dense(4, activation = "sigmoid")) #before it was softmax

model5.compile(optimizer = "adamax", loss = "categorical_crossentropy", metrics=["accuracy"]) #before optimizer was adam
history5 = model5.fit(X_train, y_train_cat, validation_data = (X_validation, y_validation_cat), batch_size=128, epochs = 10, verbose = 1) 
#before batch_size was 32

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
from sklearn.metrics import classification_report
y_pred = model5.predict(X_validation)
print(classification_report(y_validation_cat.argmax(axis=1), y_pred.argmax(axis=1), target_names=np.unique(labels)))


                        precision    recall  f1-score   support

   Bacterial Pneumonia       0.77      0.87      0.82       563
              COVID-19       0.00      0.00      0.00        26
No Pneumonia (healthy)       0.94      0.83      0.88       321
       Viral Pneumonia       0.65      0.63      0.64       369

              accuracy                           0.77      1279
             macro avg       0.59      0.58      0.59      1279
          weighted avg       0.76      0.77      0.77      1279



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
'''retrain model5 (0.78 acc) on train_set + val_set'''

## Concatenate training and validation sets
X_train = np.concatenate((X_train, X_validation), axis=0)
y_train = np.concatenate((y_train, y_validation), axis=0)

## converting to categorical
y_train_cat = to_categorical(y_train)

##retrain
history5_new = model5.fit(X_train, y_train_cat, validation_data = (X_test, y_test_cat), batch_size=200, epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
''' predict with retrained model 5'''
from sklearn.metrics import classification_report

y_pred = model5.predict(X_test)
print(classification_report(y_test_cat.argmax(axis=1), y_pred.argmax(axis=1), target_names=np.unique(labels)))

                        precision    recall  f1-score   support

   Bacterial Pneumonia       0.83      0.76      0.79       564
              COVID-19       0.00      0.00      0.00        25
No Pneumonia (healthy)       0.91      0.89      0.90       321
       Viral Pneumonia       0.60      0.73      0.66       369

              accuracy                           0.77      1279
             macro avg       0.59      0.59      0.59      1279
          weighted avg       0.77      0.77      0.77      1279



In [None]:
y_before = model5.predict(X_test)
y_pred = y_before.argmax(axis=1)
y_true = y_test_cat.argmax(axis=1)
n_classes = len(np.unique(labels))
import pandas as pd

from sklearn.metrics import precision_recall_fscore_support
res = []
for l, name in zip(range(n_classes),np.unique(labels)):
    prec,recall,fscore,_ = precision_recall_fscore_support(np.array(y_true)==l,
                                                      np.array(y_pred)==l,
                                                      pos_label=True,average=None, beta=1)
    res.append([name,recall[0],recall[1],fscore[0]])

pd.DataFrame(res,columns = ['class','sensitivity','specificity', 'f1score'])



Unnamed: 0,class,sensitivity,specificity,f1score
0,Bacterial Pneumonia,0.878322,0.758865,0.849222
1,COVID-19,0.99681,0.0,0.988533
2,No Pneumonia (healthy),0.969729,0.890966,0.966701
3,Viral Pneumonia,0.805495,0.726287,0.840596
