## Loading in and Preprocessing the Dataset

In [1]:
#importing necessary libraries
import tensorflow as tf
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_predict
import numpy as np
from scikeras.wrappers import KerasClassifier

In [2]:
#loading in and rescaling the data

train_dir = 'train'
val_dir = 'val'
test_dir = 'test'

# Create ImageDataGenerators with rescaling
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create Data Generators
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)
val_generator = val_datagen.flow_from_directory(
    val_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)

# Print the class indices
print("Class indices:", train_generator.class_indices)


Found 3616 images belonging to 2 classes.
Found 1616 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Class indices: {'NORMAL': 0, 'PNEUMONIA': 1}


In [6]:
# Extracting data from generators to use with cross_val_predict
def extract_data(generator):
    X, y = [], []
    # Iterate over the generator to get all batches
    for _ in range(len(generator)):
        x_batch, y_batch = generator.next()  # Get the next batch of images and labels
        X.extend(x_batch)  # Append images to the list
        y.extend(y_batch)  # Append labels to the list
    return np.array(X), np.array(y)  # Convert lists to NumPy arrays

# Extract data and labels from the training generator
X_train_all, y_train_scaled = extract_data(train_generator)


## Testing a Baseline Neural Network

In [12]:
from tensorflow.keras import models, layers, regularizers
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Define the input shape
input_shape = (150, 150, 3)

# Create the baseline model
baseline_model = models.Sequential()
baseline_model.add(layers.Flatten(input_shape=input_shape))
baseline_model.add(layers.Dense(512, activation='relu'))
baseline_model.add(layers.Dense(1, activation='sigmoid'))
baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the baseline model on the training data
baseline_model.fit(train_generator, epochs=50, validation_data=val_generator)

# Evaluate the baseline model on the validation data
val_loss, val_accuracy = baseline_model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

# Get predictions for the validation data
val_preds = baseline_model.predict(val_generator)
val_preds = (val_preds > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

# Calculate additional metrics
val_precision = precision_score(val_true, val_preds)
val_recall = recall_score(val_true, val_preds)
val_f1 = f1_score(val_true, val_preds)
val_conf_matrix = confusion_matrix(val_true, val_preds)

print("Baseline Model Validation Metrics:")
print(f"Precision: {val_precision}")
print(f"Recall: {val_recall}")
print(f"F1 Score: {val_f1}")
print(f"Confusion Matrix:\n {val_conf_matrix}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Validation Loss: 0.5026053786277771
Validation Accuracy: 0.7747524976730347
Baseline Model Validation Metrics:
Precision: 0.5145797598627787
Recall: 0.7425742574257426
F1 Score: 0.6079027355623101
Confusion Matrix:
 [[242 566]
 [208 600]]


## Adding in Early Stopping

In [14]:
from tensorflow.keras.callbacks import EarlyStopping

# Define the input shape
input_shape = (150, 150, 3)

# Create the baseline model
baseline_model = models.Sequential()
baseline_model.add(layers.Flatten(input_shape=input_shape))
baseline_model.add(layers.Dense(512, activation='relu'))
baseline_model.add(layers.Dense(1, activation='sigmoid'))
baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the baseline model with early stopping
history = baseline_model.fit(train_generator, epochs=50, validation_data=val_generator, callbacks=[early_stopping])

# Evaluate the baseline model on the validation data
val_loss, val_accuracy = baseline_model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

# Get predictions for the validation data
val_preds = baseline_model.predict(val_generator)
val_preds = (val_preds > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

# Calculate additional metrics
val_precision = precision_score(val_true, val_preds)
val_recall = recall_score(val_true, val_preds)
val_f1 = f1_score(val_true, val_preds)
val_conf_matrix = confusion_matrix(val_true, val_preds)

print("Baseline Model Validation Metrics:")
print(f"Precision: {val_precision}")
print(f"Recall: {val_recall}")
print(f"F1 Score: {val_f1}")
print(f"Confusion Matrix:\n {val_conf_matrix}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Validation Loss: 0.22274403274059296
Validation Accuracy: 0.9467821717262268
Baseline Model Validation Metrics:
Precision: 0.5222222222222223
Recall: 0.5235148514851485
F1 Score: 0.522867737948084
Confusion Matrix:
 [[421 387]
 [385 423]]


## Testing a L1 Regularized Neural Network

In [15]:
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Define the input shape
input_shape = (150, 150, 3)

# Create the L1 regularized model
l1_model = models.Sequential()
l1_model.add(layers.Flatten(input_shape=input_shape))
l1_model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l1(0.01)))
l1_model.add(layers.Dense(1, activation='sigmoid'))
l1_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the L1 regularized model with early stopping
history_l1 = l1_model.fit(train_generator, epochs=50, validation_data=val_generator, callbacks=[early_stopping])

# Evaluate the L1 regularized model on the validation data
val_generator.reset()
val_loss_l1, val_accuracy_l1 = l1_model.evaluate(val_generator)
print(f"L1 Regularization Model Validation Loss: {val_loss_l1}")
print(f"L1 Regularization Model Validation Accuracy: {val_accuracy_l1}")

# Get predictions for the validation data
val_preds_l1 = l1_model.predict(val_generator)
val_preds_l1 = (val_preds_l1 > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

# Calculate additional metrics
val_precision_l1 = precision_score(val_true, val_preds_l1)
val_recall_l1 = recall_score(val_true, val_preds_l1)
val_f1_l1 = f1_score(val_true, val_preds_l1)
val_conf_matrix_l1 = confusion_matrix(val_true, val_preds_l1)

print("L1 Regularization Model Validation Metrics:")
print(f"Precision: {val_precision_l1}")
print(f"Recall: {val_recall_l1}")
print(f"F1 Score: {val_f1_l1}")
print(f"Confusion Matrix:\n {val_conf_matrix_l1}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
L1 Regularization Model Validation Loss: 37.99824905395508
L1 Regularization Model Validation Accuracy: 0.5
L1 Regularization Model Validation Metrics:
Precision: 0.5
Recall: 1.0
F1 Score: 0.6666666666666666
Confusion Matrix:
 [[  0 808]
 [  0 808]]


## Testing an L2 Regularized Neural Network

In [16]:
#Creating and testing an l2 regularized model
# Define the input shape
input_shape = (150, 150, 3)

# Create the L2 regularized model
l2_model = models.Sequential()
l2_model.add(layers.Flatten(input_shape=input_shape))
l2_model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
l2_model.add(layers.Dense(1, activation='sigmoid'))
l2_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the L2 regularized model with early stopping
history_l2 = l2_model.fit(train_generator, epochs=50, validation_data=val_generator, callbacks=[early_stopping])

# Evaluate the L2 regularized model on the validation data
val_generator.reset()
val_loss_l2, val_accuracy_l2 = l2_model.evaluate(val_generator)
print(f"L2 Regularization Model Validation Loss: {val_loss_l2}")
print(f"L2 Regularization Model Validation Accuracy: {val_accuracy_l2}")

# Get predictions for the validation data
val_preds_l2 = l2_model.predict(val_generator)
val_preds_l2 = (val_preds_l2 > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

val_precision_l2 = precision_score(val_true, val_preds_l2)
val_recall_l2 = recall_score(val_true, val_preds_l2)
val_f1_l2 = f1_score(val_true, val_preds_l2)
val_conf_matrix_l2 = confusion_matrix(val_true, val_preds_l2)

print("L2 Regularization Model Validation Metrics:")
print(f"Precision: {val_precision_l2}")
print(f"Recall: {val_recall_l2}")
print(f"F1 Score: {val_f1_l2}")
print(f"Confusion Matrix:\n {val_conf_matrix_l2}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
L2 Regularization Model Validation Loss: 0.2422952502965927
L2 Regularization Model Validation Accuracy: 0.9375
L2 Regularization Model Validation Metrics:
Precision: 0.5115995115995116
Recall: 0.5185643564356436
F1 Score: 0.515058389674247
Confusion Matrix:
 [[408 400]
 [389 419]]


## Testing a Neural Network with Dropout

In [17]:
#Creating and testing a model with dropout

dropout_model = models.Sequential()
dropout_model.add(layers.Flatten(input_shape=input_shape))
dropout_model.add(layers.Dense(512, activation='relu'))
dropout_model.add(layers.Dropout(0.5))
dropout_model.add(layers.Dense(1, activation='sigmoid'))
dropout_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the dropout model with early stopping
history_dropout = dropout_model.fit(train_generator, epochs=50, validation_data=val_generator, callbacks=[early_stopping])

# Evaluate the dropout model on the validation data
val_generator.reset()
val_loss_dropout, val_accuracy_dropout = dropout_model.evaluate(val_generator)
print(f"Dropout Model Validation Loss: {val_loss_dropout}")
print(f"Dropout Model Validation Accuracy: {val_accuracy_dropout}")

# Get predictions for the validation data
val_preds_dropout = dropout_model.predict(val_generator)
val_preds_dropout = (val_preds_dropout > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

val_precision_dropout = precision_score(val_true, val_preds_dropout)
val_recall_dropout = recall_score(val_true, val_preds_dropout)
val_f1_dropout = f1_score(val_true, val_preds_dropout)
val_conf_matrix_dropout = confusion_matrix(val_true, val_preds_dropout)

print("Dropout Model Validation Metrics:")
print(f"Precision: {val_precision_dropout}")
print(f"Recall: {val_recall_dropout}")
print(f"F1 Score: {val_f1_dropout}")
print(f"Confusion Matrix:\n {val_conf_matrix_dropout}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Dropout Model Validation Loss: 0.2909701466560364
Dropout Model Validation Accuracy: 0.8719059228897095
Dropout Model Validation Metrics:
Precision: 0.4777911164465786
Recall: 0.49257425742574257
F1 Score: 0.4850700792199878
Confusion Matrix:
 [[373 435]
 [410 398]]
