In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os 
import pandas as pd
from PIL import Image
import seaborn as sns 
import matplotlib.pyplot as plt
import plotly.express as px
import plotly

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten,Input,ZeroPadding2D,BatchNormalization,Activation
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall
from tensorflow.keras.layers.experimental import preprocessing


from sklearn.metrics import classification_report

In [2]:
# the r in front of the string converts it to a raw string
folder_path = r"C:/Users/Darin Tsui/Documents/COGS 118b/Proj"
No_Tumor_Img = os.listdir(folder_path + '/no/')
Tumor_Img = os.listdir(folder_path + '/yes/')

In [3]:
Classes = {'no':0, 'yes':1}

In [4]:
X = []
y = []
for i in Classes:
    path = r"C:/Users/Darin Tsui/Documents/COGS 118b/Proj/"+i
    for j in os.listdir(path):
        img = cv2.imread(path+'/'+j, 0)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # use the process function to crop the brain and ignore the unnecessary rest part of the image
        
        # resize image
        img = cv2.resize(img, (128,128),interpolation=cv2.INTER_CUBIC)
        # normalize values
        #img = img / 255  #-->Apply normalization because we want pixel values to be scaled to the range 0-1
        X.append(img)
        y.append(Classes[i])

In [5]:
no_image_pixels = []
yes_image_pixels = []
for j in range(len(y)):
    if y[j] == 0:
        no_image_pixels.append(X[j])
    else:
        yes_image_pixels.append(X[j])

In [6]:
# Convert to numpy array 
no_image_pixels_array = np.asarray(no_image_pixels)
yes_image_pixels_array = np.asarray(yes_image_pixels)
no_image_pixels_array = no_image_pixels_array.reshape(98,16384)
yes_image_pixels_array = yes_image_pixels_array.reshape(155,16384)

In [7]:
# Train PCA on 25% of dataset
X_no_train, X_no_test = train_test_split(no_image_pixels_array, test_size = 0.75, random_state = 69)
X_no_train = X_no_train.T
X_no_test = X_no_test.T
print(X_no_train.shape)
print(X_no_test.shape)

X_yes_train, X_yes_test = train_test_split(yes_image_pixels_array, test_size = 0.75, random_state = 69)
X_yes_train = X_yes_train.T
X_yes_test = X_yes_test.T
print(X_yes_train.shape)
print(X_yes_test.shape)

(16384, 24)
(16384, 74)
(16384, 38)
(16384, 117)


In [8]:
# Standardization function
def standardize(X):
    # Calculate the mean of each column mu
    mu = np.mean(X,axis=0)
    
    # Subtract the mean from X
    X = X - mu
    
    # Calculate standard deviation of each column
    std = np.std(X,axis=0)
    
    # Handle zero standard deviation case
    std_filled = std.copy()
    std_filled[std==0] = 1.0
    
    # Calculate standardized X
    X_bar = (X-mu)/std_filled
    
    return X_bar, mu, std


In [110]:
# Function to calculate eigenvectors and eigenvalues
def eig(S):
    eig_vec, eig_val = np.linalg.eigh(S)
    # sorting eigenvalues in decreasing order
    # np.argsort returns indices of positions 
    sorted_eig = np.argsort(-eig_val)
    eig_val = eig_val[sorted_eig]
    eig_vec = eig_vec[sorted_eig]
    
    return (eig_vec, eig_val)
    

In [10]:
# Function to calculate projection matrix for reconstruction
def projection(A):
    M = A @ A.T
    return M

In [178]:
# Implement PCA
def PCA(X,mu, num_pcs):
    # Calculate the covariance matrix
    dim = np.shape(X)
    avg = np.tile(np.array([X.mean(axis=1)]).T, (1,dim[1])) 
    Z = X - avg
    N = dim[1]
    cov_matrix = Z.T.dot(Z) / N
    
    #cov_matrix = np.cov(X)
    # Find eigenvalue and eigenvectors of covariance matrix
    eig_vecs, eig_vals = eig(cov_matrix)
    
    # Select specific eigenvectors
    U = Z.dot(eig_vecs)
    U = U / U.max(axis=0)
    return U
    # Find principal components
    principal_components = U.T.dot()
    X = X(:,num_pcs)
    principal_components = U.T * (X-mu)

    # Reconstruct 
    U = U[:,0:num_pcs]
    principal_components = principal_components[0:num_pcs]
    zero_mean_reconstruction = U * principal_components
    reconstruction = zero_mean_reconstruction + mu
    return reconstruction

In [181]:
no_pixel_mu = no_image_pixels_array.mean().mean()
yes_pixel_mu = no_image_pixels_array.mean().mean()
no_reconstruction = PCA(X_no_train, no_pixel_mu, 5)
yes_reconstruction = PCA(X_yes_train, yes_pixel_mu, 5)

In [None]:
projection

In [None]:
# CNN Model

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=64,
                           kernel_size=3,
                           activation='relu',
                           input_shape=(128,128,1)),
    tf.keras.layers.Conv2D(32,(4,4),activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2),
                              padding='valid'),
    tf.keras.layers.Conv2D(32,(4,4),activation='relu'),
    tf.keras.layers.Conv2D(16,3,activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(x=X_train, y=y_train, epochs=10, validation_data=(X_test, y_test))