<h1> BME548 Final Project Code </h1>
Emma Nisbet, Mengde Liu, and Talya Jeter

In [1]:
from google.colab import drive
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, losses, Model
from tensorflow.keras.utils import image_dataset_from_directory
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.utils import plot_model
tf.keras.backend.set_image_data_format("channels_last")
import cv2
import tqdm

In [2]:
#connect to drive, where we saved the dataset images
drive.mount('/content/drive/')
os.chdir('/content/drive/MyDrive/BME 548L Group')

Mounted at /content/drive/


In [4]:
!ls

 archive				   General_aim.pdf
 checkpoint				  'Ghosting Simulation.ipynb'
 entire_model.png			  'Project Proposal.gdoc'
 final_model_weights.data-00000-of-00001   ResNet50_Modeling.ipynb
 final_model_weights.h5			   ResNet50_Modeling_testing_physical_layer.ipynb
 final_model_weights.index		   ResNet50_Modeling_validate.ipynb
'Final Paper.gdoc'			   resnet50_model.png
'Final Presentation.gslides'		  'Training Layer.ipynb'


# Data Loading

In [5]:
train_data_dir = r'/content/drive/MyDrive/BME 548L Group/archive/Training'
test_data_dir = r'/content/drive/MyDrive/BME 548L Group/archive/Testing'

In [None]:
def load_images_from_folder(folder):
  '''
  Read in images from folders, convert to grayscale, crop to 128 * 128, and normalize.
  Return images and labels as numpy arrays.
  '''

  images = []
  labels = []
  # Iterate through the folders in the parent directory
  for folder_name in os.listdir(folder):
      folder_path = os.path.join(folder, folder_name)

      # Ensure that it's a directory (not a file)
      if os.path.isdir(folder_path):
          # Use the folder name as the label
          label = folder_name

          # Change directory to the folder containing the images
          os.chdir(folder_path)

          # Loop through the files in the directory
          for filename in tqdm.tqdm(os.listdir()):
              if filename.endswith('.jpg'):
                  img = cv2.imread(filename)
                  #Turn to grayscale
                  img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                  # Normalize
                  img = cv2.resize(img, (128, 128)) /255.0
                  # Append the image to the training list
                  images.append(img)
                  # Append the label to the training label list
                  labels.append(label)
  return np.array(images), np.array(labels)


print("load training...")
X_train, y_train = load_images_from_folder(train_data_dir)

print("load testing...")
X_test, y_test = load_images_from_folder(test_data_dir)


# Turn y labels into one hot encoding ex: notumor = [0, 0, 1, 0]
classes = ['glioma', 'meningioma', 'notumor', 'pituitary']

y_train_new = []
for i in y_train:
    y_train_new.append(classes.index(i))
y_train = tf.keras.utils.to_categorical(y_train_new)

y_test_new = []
for i in y_test:
    y_test_new.append(classes.index(i))
y_test = tf.keras.utils.to_categorical(y_test_new)




In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
#Example of training set image
plt.imshow(X_train[10], cmap='gray')
print("label = ", y_train[10])

# Data Preprocessing

In [10]:
def ghost_kernel(kx, ky, direction):
  '''
  Create a ghosting mask of shape (kx, ky) for a given kernel size and direction.
  '''
  mask = np.ones((kx, ky))
  if direction == "vertical":
    kernel_size = 4
    delA = 0.75
    dx = 4
    for i in range(0, kx, kernel_size):
        for j in range(0, ky, kernel_size):
            if  (i/kernel_size)% dx==0:
                mask[i:i+kernel_size,j:j+kernel_size] = mask[i:i+kernel_size,j:j+kernel_size]-delA
  elif direction == "horizontal":
    kernel_size = 4
    delA = 0.75
    dx = 4
    for i in range(0, kx, kernel_size):
        for j in range(0, ky, kernel_size):
            if (j/kernel_size)%dx ==0:
                mask[i:i+kernel_size,j:j+kernel_size] = mask[i:i+kernel_size,j:j+kernel_size]-delA
  elif direction == "diagonal":
    kernel_size = 2
    delA = 0.75
    dx = 2
    for i in range(0, kx, kernel_size):
        for j in range(0, ky, kernel_size):
            if  (i/kernel_size)% dx==0 and (j/kernel_size)%dx ==0:
                mask[i:i+kernel_size,j:j+kernel_size] = mask[i:i+kernel_size,j:j+kernel_size]-delA
  return mask

def apply_ghosting(img, direction):
  # Perform Fourier transform
  k_space_data = np.fft.fft2(img)
  # Shift zero frequency component to the center
  k_space_data = np.fft.fftshift(k_space_data)

  # Get dimensions of k_space_data
  kx,ky = k_space_data.shape

  # Create mask
  mask = ghost_kernel(kx, ky, direction)

  # Apply mask
  k_space_data = k_space_data * mask

  # Perform inverse Fourier transform
  img_back = np.fft.ifft2(k_space_data)
  img_back = np.abs(img_back)

  return img_back

In [None]:
def ghost_dataset(data):
  '''
  Apply ghosting simulation on a dataset of images.
  Each image in the new dataset is a 128x128x4 array.
  The first channel is the original image, the second channel is the vertical ghost,
  the third channel is the horizontal ghost, and the fourth channel is the diagonal ghost.
  Returns a new dataset of shape (num_images, 128, 128, 4).
  '''
  images = []
  for image in tqdm.tqdm(data):
      img = np.zeros((128, 128, 4))
      img[..., 0] = image
      img[..., 1] = apply_ghosting(image, "vertical")
      img[..., 2] = apply_ghosting(image, "horizontal")
      img[..., 3] = apply_ghosting(image, "diagonal")
      images.append(img)
  images = np.array(images)
  return images


#Apply ghosting to training and testing sets
training_ghosted = ghost_dataset(X_train)
testing_ghosted = ghost_dataset(X_test)

In [None]:
#VISUALIZE GHOSTING IMAGES
plt.figure(figsize=(10,10))
titles = ["original", "vertical", "horizontal", "diagonal"]
print("label = ", y_train[0])
for i in range(4):
    plt.subplot(2, 2, i+1)
    plt.title(titles[i])
    plt.imshow(training_ghosted[0][...,i],cmap='gray')
    plt.axis('off')

In [None]:
#Visualize new training image: 4 channel image of different blurs
plt.imshow(training_ghosted[0])

# Phsyical Layer - Ghosting

In [27]:
class Ghosting(tf.keras.layers.Layer):
    def __init__(self, is_train=False):
        super(Ghosting, self).__init__()
        self.is_train = is_train

    def build(self, input_shape):
        # initialize weight to be of shape (1,4) of all 1s
        self.initializer = tf.keras.initializers.RandomNormal(mean = 1, stddev = .0)
        self.img_weights = self.add_weight(shape=(1,4),
                                     initializer=self.initializer,
                                     trainable=self.is_train,
                                     name='mask')
    def call(self, inputs):
        #multiply image with weights
        out = inputs * self.img_weights
        #summation of images, remove last channel
        out = tf.reduce_sum(out, axis=-1,keepdims=False)
        #repeat to 3 channels to accomdate resnet model
        out = tf.repeat(out[..., np.newaxis], 3, -1)
        return out

#ResNet50 Model - Transfer learning with ImageNet weights

In [28]:
num_classes = 4
input_size = (128, 128,4)

In [29]:
#Create ResNet-50 base model for transfer learning, pre-trained on ImageNet
base_model = tf.keras.applications.ResNet50(weights = 'imagenet', include_top = False, input_shape = (128,128,3))
for layer in base_model.layers:
  layer.trainable = False

In [None]:
#Visualize mode/layers
plot_model(base_model, to_file = "resnet50_model.png", show_shapes = True)

In [32]:
model = tf.keras.models.Sequential([
    # Input layer
    tf.keras.layers.Input(input_size),

    # Physical layer
    Ghosting(is_train=True),

    # Transfer learning
    base_model,

    #Classification Layer
    layers.Flatten(),
    layers.Dense(num_classes, activation='softmax')

])

In [33]:
#Compile model with adam optimizer, cross entropy loss
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
plot_model(model, show_shapes=True, to_file="entire_model.png" )

In [None]:
#Weights before Training
print(model.layers[0].get_weights())

In [None]:
#Prediction of Ghosting Layer before Training
subModel = Model(inputs=model.input, outputs=model.layers[0].output)
predictions = subModel.predict(training_ghosted)

In [None]:
# Plot Example Prediction
plt.imshow(predictions[0], cmap = 'gray')
plt.title('Output Image from Ghosting Layer without Training')
plt.axis('off')

In [None]:
#Train model for 5 epochs
history = model.fit(training_ghosted, y_train, epochs=5,batch_size = 16)

In [None]:
#Test Set Metrics
model.evaluate(testing_ghosted,y_test)

In [None]:
#Prediction of Ghosting Layer after Training
subModel = Model(inputs=model.input, outputs=model.layers[0].output)
predictions = subModel.predict(training_ghosted)

In [None]:
#Plot Example Prediction
plt.imshow(predictions[0], cmap = 'gray')
plt.title('Output Image from Ghosting Layer after Training')
plt.axis('off')

In [None]:
#Print weights after training
print(model.layers[0].get_weights())

# Confusion Matrix

In [None]:
#Generate true labels and model predictions
true_labels = y_test_new
preds = model.predict(testing_ghosted)
preds = np.argmax(preds, axis = 1)

In [None]:
#Visualize Confusion Matrix
cm = confusion_matrix(true_labels, preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot()

In [None]:
#Generate Visualizations of Training Loss and Accuracy vs epochs
loss = history.history['loss']
acc = history.history['accuracy']

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot([i for i in range(1,6)],loss ,label='Training Loss', color = 'orange')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot([i for i in range(1,6)], acc, label='Training Accuracy', color = 'green')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')
plt.legend()

In [None]:
#Save model weights to reuse later, if needed
model.save_weights('final_model_weights.h5')

# Referenced Code
https://www.kaggle.com/code/abdullahsaida011/brain-tumor-mri-using-efficientnet

https://www.kaggle.com/code/tusharnarkhede/brain-tumor-classification

https://medium.com/swlh/resnet-with-tensorflow-transfer-learning-13ff0773cf0c

#Previous Results



**Original Train and Testing (Baseline Results):** <br>
Epoch 1/5
 loss: 0.4518 - accuracy: 0.8692 - val_loss: 0.3713 - val_accuracy: 0.8664 <br>
Epoch 2/5
loss: 0.0392 - accuracy: 0.9876 - val_loss: 0.2172 - val_accuracy: 0.9237 <br>
Epoch 3/5
loss: 0.0071 - accuracy: 0.9992 - val_loss: 0.2319 - val_accuracy: 0.9160 <br>
Epoch 4/5
 loss: 0.0026 - accuracy: 1.0000 - val_loss: 0.2159 - val_accuracy: 0.9237<br>
Epoch 5/5
 loss: 0.0018 - accuracy: 1.0000 - val_loss: 0.2130 - val_accuracy: 0.9237 <br>
TEST SET: 41/41 [==============================] - 5s 126ms/step - loss: 0.2024 - accuracy: **0.9375**
[0.20240727066993713, 0.9374523162841797]<br>
Confusion Matrix: array([[271,  28,   0,   1],
       [ 10, 290,   4,   2],
       [  0,   1, 404,   0],
       [  4,  32,   0, 264]])