In [None]:
import os
import numpy as np
import pickle
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from keras.applications.vgg16 import VGG16
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.regularizers import l2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve, average_precision_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/pickle_files/
!ls

In [None]:
# Define the project directory in Google Drive
project_dir = '/content/drive/MyDrive/Tumor_Classification_Deep_Learning_Alogorithm'

In [None]:

hyperparams = {
    'learning_rate': 0.00001,
    'batch_size': 64,
    'dropout_rate': 0.5,
    'l2_reg': 0.001,
    'rotation_range': 30,
    'width_shift_range': 0.2,
    'height_shift_range': 0.2,
    'shear_range': 0.2,
    'zoom_range': 0.2,
    'horizontal_flip': True,
    'epochs': 50,
    'patience': 5,
    'min_lr': 0.00001
}

In [None]:
# Load the balanced training data
with open("X_train_balanced.pickle", "rb") as pickle_in:
    X_train_balanced = pickle.load(pickle_in)

with open("Y_train_balanced.pickle", "rb") as pickle_in:
    Y_train_balanced = pickle.load(pickle_in)

# Normalize the balanced training data
X_train_balanced = X_train_balanced / 255.0

# Convert Y_train_balanced to numpy array
Y_train_balanced = np.array(Y_train_balanced)

# Reverse the labels in the balanced training data
Y_train_balanced = 1 - Y_train_balanced  # Swap 0 and 1

In [None]:
# Create the directory if it doesn't exist
if not os.path.exists(project_dir):
    os.makedirs(project_dir)

# Load the original dataset
with open("X_train.pickle", "rb") as pickle_in:
    X_train_orig = pickle.load(pickle_in)

with open("Y_train.pickle", "rb") as pickle_in:
    Y_train_orig = pickle.load(pickle_in)

# Normalize the data
X_train_orig = X_train_orig / 255.0

# Convert Y_train_orig to numpy array
Y_train_orig = np.array(Y_train_orig)

# Reverse the labels
Y_train_orig = 1 - Y_train_orig  # Swap 0 and 1

# Split the original data into training and validation sets
X_train_split, X_val, Y_train_split, Y_val = train_test_split(X_train_orig, Y_train_orig, test_size=0.2, random_state=42)

# Convert lists to numpy arrays
Y_train_split = np.array(Y_train_split)
Y_val = np.array(Y_val)

# Check shapes and values
print(f"X_train_split shape: {X_train_split.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"Y_train_split shape: {Y_train_split.shape}")
print(f"Y_val shape: {Y_val.shape}")

In [None]:
def train_and_evaluate_model(hyperparams):
    # Data augmentation for training data
    train_datagen = ImageDataGenerator(
        rotation_range=hyperparams['rotation_range'],
        width_shift_range=hyperparams['width_shift_range'],
        height_shift_range=hyperparams['height_shift_range'],
        shear_range=hyperparams['shear_range'],
        zoom_range=hyperparams['zoom_range'],
        horizontal_flip=hyperparams['horizontal_flip'],
        fill_mode='nearest'
    )

    # Data augmentation for validation data (only rescaling)
    val_datagen = ImageDataGenerator()

    # Create the VGG16 base model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom layers on top of the base model
    model = Sequential([
        base_model,
        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=l2(hyperparams['l2_reg'])),
        BatchNormalization(),
        Dropout(hyperparams['dropout_rate']),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(hyperparams['l2_reg']))
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=hyperparams['learning_rate']), loss='binary_crossentropy', metrics=['accuracy'])

    # Set up callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=hyperparams['patience'], restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=hyperparams['min_lr'])

    class_weights = {0: 1., 1: len(Y_train_balanced) / np.sum(Y_train_balanced)}

    # Train the model
    history = model.fit(
        train_datagen.flow(X_train_balanced, Y_train_balanced, batch_size=hyperparams['batch_size']),
        validation_data=val_datagen.flow(X_val, Y_val),
        epochs=hyperparams['epochs'],
        callbacks=[early_stopping, reduce_lr],
        steps_per_epoch=len(X_train_balanced) // hyperparams['batch_size'],
        validation_steps=len(X_val) // hyperparams['batch_size'],
        class_weight=class_weights
    )

    # Evaluate the model
    val_loss, val_accuracy = model.evaluate(X_val, Y_val)
    print(f"Validation Loss: {val_loss}")
    print(f"Validation Accuracy: {val_accuracy}")

    return model, history, val_loss, val_accuracy




In [None]:
def train_and_evaluate_model(hyperparams):
    # Data augmentation for training data
    train_datagen = ImageDataGenerator(
        rotation_range=hyperparams['rotation_range'],
        width_shift_range=hyperparams['width_shift_range'],
        height_shift_range=hyperparams['height_shift_range'],
        shear_range=hyperparams['shear_range'],
        zoom_range=hyperparams['zoom_range'],
        horizontal_flip=hyperparams['horizontal_flip'],
        fill_mode='nearest'
    )

    # Data augmentation for validation data (only rescaling)
    val_datagen = ImageDataGenerator()

    # Create the VGG16 base model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom layers on top of the base model
    model = Sequential([
        base_model,
        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=l2(hyperparams['l2_reg'])),
        BatchNormalization(),
        Dropout(hyperparams['dropout_rate']),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(hyperparams['l2_reg']))
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=hyperparams['learning_rate']), loss='binary_crossentropy', metrics=['accuracy'])

    # Set up callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=hyperparams['patience'], restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=hyperparams['min_lr'])

    class_weights = {0: 1., 1: len(Y_train_balanced) / np.sum(Y_train_balanced)}

    # Train the model
    history = model.fit(
        train_datagen.flow(X_train_balanced, Y_train_balanced, batch_size=hyperparams['batch_size']),
        validation_data=val_datagen.flow(X_val, Y_val),
        epochs=hyperparams['epochs'],
        callbacks=[early_stopping, reduce_lr],
        steps_per_epoch=len(X_train_balanced) // hyperparams['batch_size'],
        validation_steps=len(X_val) // hyperparams['batch_size'],
        class_weight=class_weights
    )

    # Evaluate the model
    val_loss, val_accuracy = model.evaluate(X_val, Y_val)
    print(f"Validation Loss: {val_loss}")
    print(f"Validation Accuracy: {val_accuracy}")

    return model, history, val_loss, val_accuracy




In [None]:
# Train and evaluate the model with initial hyperparameters
model, history, val_loss, val_accuracy = train_and_evaluate_model(hyperparams)

In [None]:
# Save the model to Google Drive
model.save(os.path.join(project_dir, 'VGG16_model_class_weights.h5'))

# Save the model weights to Google Drive
model.save_weights(os.path.join(project_dir, 'VGG16_weights_class_weights.h5'))

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')

# Plot training & validation accuracy values
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report
import seaborn as sns

# Get predictions
Y_pred = model.predict(X_val)
Y_pred_classes = (Y_pred > 0.5).astype("int32")

# Confusion matrix
cm = confusion_matrix(Y_val, Y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Print classification report
print("Classification Report:")
print(classification_report(Y_val, Y_pred_classes, target_names=['Negative', 'Positive']))


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

# Precision-Recall curve
precision, recall, _ = precision_recall_curve(Y_val, Y_pred)
average_precision = average_precision_score(Y_val, Y_pred)

# Plot Precision-Recall curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='purple', lw=2, label='Precision-Recall curve (area = %0.2f)' % average_precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc

# ROC curve
fpr, tpr, _ = roc_curve(Y_val, Y_pred)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()
