Basics of Deep Learning assignment - Test Notebook

---
Written by:

- Matan Ofri
- Itamar Kirsch


In [None]:
import keras as ker
import numpy as np
import os
import random
from google.colab import drive
from keras.models import load_model, Sequential
from keras.callbacks import EarlyStopping , ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.layers import Conv2D, MaxPooling2D, SeparableConv2D, Flatten, BatchNormalization, Dropout, Dense
from keras import initializers, models
from keras.initializers import glorot_normal
import matplotlib.pyplot as plt
import pickle

# Question number 1 - Binary and Multi-Class Classification

---

In [None]:
PATH='/content/drive/Shareddrives/DeepLearning/Basics/'

🛑**Please note - you must load this block:**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
test_binary = PATH+'Kaggle Data/chest_xray_2/test'
test_multiclass = PATH+'Kaggle Data/chest_xray_3/test'
if os.path.exists(test_binary)&os.path.exists(test_multiclass):
  print("Data is loaded")
else:
  print("There is a problem loading data")

Parameters for a binary model

In [None]:
img_height = 256
img_width = 256
batch_size=20

Parameters for a multi-class model

In [None]:
img_height = 96
img_width = 96
batch_size=20

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_set_multi = test_datagen.flow_from_directory(
    test_multiclass,
    target_size = (img_height, img_width),
    class_mode='categorical',
    shuffle=True,
)

test_set_bin = test_datagen.flow_from_directory(
    test_binary,
    target_size = (img_height, img_width),
    class_mode='binary',
    color_mode='grayscale',
    shuffle=True
)

Running the binary model on the test set:

In [None]:
model_bin = ker.Sequential()
#32 Inputs Block:
model_bin.add(Conv2D(32, 3, activation='relu', strides=2, padding = 'same', name='Conv2D_input', input_shape=(img_height, img_width, 1),kernel_initializer=glorot_normal()))
model_bin.add(MaxPooling2D(2, name='MaxPooling2D_input'))
#64-A Inputs Block:
model_bin.add(Conv2D(64, 3, activation='relu', strides=2, padding = 'same', name='Conv2D_1'))
model_bin.add(MaxPooling2D(2, name='MaxPooling2D_1'))
#128 Inputs Block:
model_bin.add(Conv2D(128, 3, activation='relu', strides=2, padding = 'same', name='Conv2D_2'))
model_bin.add(MaxPooling2D(2, name='MaxPooling2D_2'))
#64-B Inputs Block:
model_bin.add(Conv2D(64, 3, activation='relu', strides=2, padding = 'same', name='Conv2D_3'))
model_bin.add(MaxPooling2D(2, name='MaxPooling2D_3'))

model_bin.add(Flatten(name='Flatten'))
model_bin.add(Dense(64, activation='relu', name='Dense_1'))
#Binary classification, so we use 1 unit and 'sigmoid' activation:
model_bin.add(Dense(1, activation='sigmoid', name='Dense_output'))
model_bin.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model_bin.load_weights(PATH+'Models/BinaryWeights.h5')
model_bin.evaluate(test_set_bin)

Running the multi-class model on the test set:

In [None]:
model_multi = Sequential()
model_multi.add(Conv2D(32, (3,3), activation = 'relu', name='Conv2D_input',input_shape = (img_height, img_width, 3)))
model_multi.add(MaxPooling2D((2,2), name='MaxPooling2D_input'))

model_multi.add(Conv2D(64, (3,3), activation = 'relu', name='Conv2D_1'))
model_multi.add(MaxPooling2D((2,2), name='MaxPooling2D_1'))
model_multi.add(Dropout(0.2, name='Dropout_1'))

model_multi.add(Conv2D(128, (3,3), activation = 'relu', name='Conv2D_2'))
model_multi.add(MaxPooling2D((2,2), name='MaxPooling2D_2'))
model_multi.add(Dropout(0.2, name='Dropout_2'))

model_multi.add(Conv2D(256 , (3,3), activation = 'relu', name='Conv2D_3'))
model_multi.add(MaxPooling2D((2,2), name='MaxPooling2D_3'))

model_multi.add(Flatten(name='Flaten'))
model_multi.add(Dropout(0.2, name='Dropuot_flatten'))
model_multi.add(Dense(256 , activation = 'relu', name='Dense_flatten'))
model_multi.add(Dense(3, activation='softmax', name='Dense_output'))
model_multi.compile(optimizer = RMSprop(learning_rate=0.001, rho=0.90, epsilon=1e-08),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model_multi.load_weights(PATH+'Models/MutiWeights.h5')
model_multi.evaluate(test_set_multi)

# Question number 2 - KNN with Embedding Vector

---

1. Load Test Set

🛑**Please note - you must load this block:**

In [None]:
drive.mount('/content/drive')
test_KNN = PATH+'Kaggle Data/chest_xray_3/test'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
img_height = 300
img_width = 300
batch_size=20
datagen = ImageDataGenerator(rescale=1./255)
test_set_knn = datagen.flow_from_directory(
    test_KNN,
    target_size = (img_height, img_width),
    class_mode='categorical',
    color_mode='grayscale',
    shuffle=False
)

2. Load KNN Model

🛑**Please note - you must load this block:**

In [None]:
KNN_model = pickle.load(open(PATH+'Models/knnpickle_file', 'rb'))
model = load_model(PATH+'Models/KNN.keras')
embedding_model = Sequential()
for layer in model.layers[:-1]:
 embedding_model.add(layer)
embedding_model.compile()

3. Run the KNN model on the entire Test Set and get the accuracy percentages

In [None]:
test_embeddings= embedding_model.predict(test_set_knn)
acc=0
for x in range(0 , (test_set_knn.classes.size)-1):
  predicted_class = KNN_model.predict(test_embeddings[x].reshape(1,-1))
  if predicted_class == test_set_knn.classes[x]:
    acc=acc+1
print ("The accuarcy for the KNN Model is: ",acc/test_set_knn.classes.size)

4. Load a random image from the Test Set and test it on the KNN model

In [None]:
class_names = ['NORMAL', 'BACTERIA', 'VIRUS']
selected_class = random.choice(class_names)
class_dir = os.path.join(test_KNN, selected_class)
image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith('.jpeg')]
selected_image_path = random.choice(image_files)

# Load and display the selected image
img = image.load_img(selected_image_path, target_size=(img_height, img_width))
gray_img = img.convert('L')
gray_array = image.img_to_array(gray_img)
gray_array = np.expand_dims(gray_array, axis=0)
gray_array = gray_array / 255.0

# Make predictions
embedding_image = embedding_model.predict(gray_array)
predicted_class = KNN_model.predict(embedding_image)[0]

# Display the image
plt.imshow(img)
class_name = [class_name for class_name, index in test_set_knn.class_indices.items() if index == predicted_class][0]
plt.text(10, 20, f'Actual Class: {selected_class}', color='blue', fontsize=12, fontweight='bold')
detectColor='red'
if class_name==selected_class:
  detectColor='green'
plt.text(10, 40, f'Predicted Class: {class_name}', color=detectColor, fontsize=12, fontweight='bold')
plt.show()

# Question number 3 - Identifying anomalies in X-ray images

---

1. Load test set data

🛑**Please note - you must load this block:**

In [None]:
drive.mount('/content/drive')
test_dir = PATH+'Kaggle Data/chest_xray_3/test'
test_anom = PATH+'Kaggle Data/chest_xray_2/test'
if os.path.exists(test_dir):
  print("Data is loaded")
else:
  print("There is a problem loading data")

In [None]:
batch_size = 32
img_height = 256
img_width = 256
latent_dim = 128

2. Load Autoencoder model

🛑**Please note - you must load this block:**

In [None]:
drive.mount('/content/drive')
autoencoder=ker.models.load_model(PATH+'Models/autoencoder.keras')

In [None]:
with open(PATH+'Models/threshold.txt', 'r') as f:
    threshold = float(f.read())

3. Anomaly Detection on Chest X-ray Images with Autoencoder - Test on a random image


In [None]:
class_names = ['NORMAL', 'BACTERIA', 'VIRUS']
selected_class = random.choice(class_names)
class_dir = os.path.join(test_dir, selected_class)
image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith('.jpeg')]

# Randomly select an image file from the list
selected_image_path = random.choice(image_files)

# Load and display the selected image
img = image.load_img(selected_image_path, target_size=(img_height, img_width))
gray_img = img.convert('L')
gray_array = image.img_to_array(gray_img)
gray_array = np.expand_dims(gray_array, axis=0)
gray_array = gray_array / 255.0

# Make predictions
reconstructed_image = autoencoder.predict(gray_array)
test_threshold = np.mean(np.abs(reconstructed_image -gray_array))
th=(np.mean(test_threshold))
print('test=',th,' loss=',threshold)
if(th > threshold):
  print('Anomaly detected!')
  detection='Anomaly detected!'
  anomcolor='red'
else:
  print('Anomaly not detected')
  detection='Anomaly not detected'
  anomcolor='blue'
plt.imshow(img)
plt.text(10, 20, f'True Class: {selected_class}', color='red', fontsize=12, fontweight='bold')
plt.text(10, 40, detection, color=anomcolor, fontsize=12, fontweight='bold')
plt.show()
plt.subplot(1, 2, 2)
plt.imshow(np.abs(reconstructed_image - gray_array)[0], cmap='magma')  # Display the difference as a grayscale image
plt.title('Difference Image')
plt.axis('off')

plt.show()

4. Anomaly Detection on Chest X-ray Images with Autoencoder - Test on all test set

In [None]:
class_dir = os.path.join(test_anom, 'NORMAL')
image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith('.jpeg')]
TP=0
FP=0
TN=0
FN=0
P=0
for dirs in image_files:
  img = image.load_img(dirs, target_size=(img_height, img_width))
  P=P+1
  gray_img = img.convert('L')
  gray_array = image.img_to_array(gray_img)
  gray_array = np.expand_dims(gray_array, axis=0)
  gray_array = gray_array / 255.0
  reconstructed_image = autoencoder.predict(gray_array)
  test_threshold = np.mean(np.abs((reconstructed_image -gray_array)))
  print('Sample Score:',test_threshold,'Threshold:',threshold)
  if(test_threshold > threshold):
   FN=FN+1
  else:
    TP=TP+1

class_dir = os.path.join(test_anom, 'PNEUMONIA')
image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith('.jpeg')]
N=0
for dirs in image_files:
  N=N+1
  img = image.load_img(dirs, target_size=(img_height, img_width))
  gray_img = img.convert('L')
  gray_array = image.img_to_array(gray_img)
  gray_array = np.expand_dims(gray_array, axis=0)
  gray_array = gray_array / 255.0
  reconstructed_image = autoencoder.predict(gray_array)
  test_threshold = np.mean(np.abs((reconstructed_image -gray_array)))
  print('Sample Score:',test_threshold ,'Threshold:',threshold)
  if(test_threshold > threshold):
    TN=TN+1
  else:
    FP=FP+1

print('DONE')
print('ACC is:',(TP+TN)/(N+P))