In [45]:
import numpy as np 
from tqdm import tqdm
import os
import cv2
import numpy as np

In [46]:
REBUILD_DATA = True # set to true to one once, then back to false unless you want to change something in your training data.

In [47]:
class TumorImages:
    IMG_SIZE = 50
    YES = "Resources/brain_tumor_dataset/yes"
    NO = "Resources/brain_tumor_dataset/no"
    LABELS = {NO: 0, YES: 1}
    training_data = []

    yescount = 0
    nocount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpg" in f.lower() or "jpeg" in f.lower():
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])
                        
                        if label == self.YES:
                            self.yescount += 1
                        elif label == self.NO:
                            self.nocount += 1

                    except Exception as e:
                        pass

        np.random.shuffle(self.training_data)
        
        # Separate features and labels
        X = np.array([i[0] for i in self.training_data])
        y = np.array([i[1] for i in self.training_data])

        np.save("X.npy", X)
        np.save("y.npy", y)
        print('Tumor:', self.yescount)
        print('No Tumor:', self.nocount)

In [48]:
if REBUILD_DATA:
    tumors = TumorImages()
    tumors.make_training_data()

X = np.load("X.npy", allow_pickle=True)
y = np.load("y.npy", allow_pickle=True)

Resources/brain_tumor_dataset/no


100%|██████████| 98/98 [00:00<00:00, 1318.49it/s]


Resources/brain_tumor_dataset/yes


100%|██████████| 155/155 [00:00<00:00, 1419.58it/s]

Tumor: 154
No Tumor: 97





In [49]:
type(X)

numpy.ndarray

In [50]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Normalize the images
X_train = X_train / 255.0
X_test = X_test / 255.0

In [51]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers.legacy import Adam #Works better with M3 Macs
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X_train_rgb = np.stack([X_train]*3, axis=-1)
X_test_rgb = np.stack([X_test]*3, axis=-1)

# Load the VGG16 model without the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(50, 50, 3))

# Freeze all the layers except the last few
for layer in base_model.layers[:-4]:
    layer.trainable = False

# Add custom layers
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(2, activation='softmax')(x)

# Define the model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


In [52]:
# Train the model
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train_rgb, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/50


2024-06-27 16:21:52.597991: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2024-06-27 16:21:55.837370: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


In [53]:
# Evaluate on the test set
y_pred = model.predict(X_test_rgb)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred_classes)
precision = precision_score(y_true, y_pred_classes)
recall = recall_score(y_true, y_pred_classes)
f1 = f1_score(y_true, y_pred_classes)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Accuracy: 0.89
Precision: 0.88
Recall: 0.95
F1-score: 0.92


2024-06-27 16:22:15.732423: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [54]:
# Seeing if I can improve the model to minimize false negatives

# Modify custom layers on top
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(2, activation='softmax')(x)

# Define the modified model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model with the same optimizer and loss function
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

In [55]:
class_weight = {0: 1.5, 1: 1.0}

# Train the model with class weights
history = model.fit(X_train_rgb, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], class_weight=class_weight)


Epoch 1/50


2024-06-27 16:28:01.722675: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50


2024-06-27 16:28:02.241206: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


In [56]:
# Train the modified model
history = model.fit(X_train_rgb, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], class_weight=class_weight)

# Evaluate on the test set
y_pred = model.predict(X_test_rgb)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred_classes)
precision = precision_score(y_true, y_pred_classes)
recall = recall_score(y_true, y_pred_classes)
f1 = f1_score(y_true, y_pred_classes)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Accuracy: 0.89
Precision: 0.90
Recall: 0.93
F1-score: 0.91


2024-06-27 16:28:12.703830: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [58]:
# Dang, we did worse with our recall. Well, I think I'll leave it there for now, having done enough damage for the moment. 