## Loading in and Preprocessing the Dataset

In [1]:
#importing necessary libraries
import tensorflow as tf
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_predict
import numpy as np
from scikeras.wrappers import KerasClassifier

In [2]:
#loading in and rescaling the data

train_dir = 'train'
val_dir = 'val'
test_dir = 'test'

# Create ImageDataGenerators with rescaling
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create Data Generators
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)
val_generator = val_datagen.flow_from_directory(
    val_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(150, 150), 
    batch_size=32, 
    class_mode='binary'
)

# Print the class indices
print("Class indices:", train_generator.class_indices)


Found 3616 images belonging to 2 classes.
Found 1616 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Class indices: {'NORMAL': 0, 'PNEUMONIA': 1}


In [6]:
# Extracting data from generators to use with cross_val_predict
def extract_data(generator):
    X, y = [], []
    # Iterate over the generator to get all batches
    for _ in range(len(generator)):
        x_batch, y_batch = generator.next()  # Get the next batch of images and labels
        X.extend(x_batch)  # Append images to the list
        y.extend(y_batch)  # Append labels to the list
    return np.array(X), np.array(y)  # Convert lists to NumPy arrays

# Extract data and labels from the training generator
X_train_all, y_train_scaled = extract_data(train_generator)


## Testing a Baseline Neural Network

In [8]:
# Define, wrap, and test the baseline model on training data

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Define the input shape
input_shape = (150, 150, 3)

def create_baseline_model(input_shape=input_shape):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model using SciKeras
keras_wrapper_baseline = KerasClassifier(model=create_baseline_model, epochs=50, batch_size=32, verbose=0)

# Perform cross-validation
np.random.seed(123)
cv_baseline_preds = cross_val_predict(keras_wrapper_baseline, X_train_all, y_train_scaled, cv=5)

# Evaluate the predictions
accuracy = accuracy_score(y_train_scaled, cv_baseline_preds)
precision = precision_score(y_train_scaled, cv_baseline_preds)
recall = recall_score(y_train_scaled, cv_baseline_preds)
f1 = f1_score(y_train_scaled, cv_baseline_preds)
conf_matrix = confusion_matrix(y_train_scaled, cv_baseline_preds)

print("Baseline Model Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)


Baseline Model Metrics:
Accuracy: 0.964325221238938
Precision: 0.9745489690721649
Recall: 0.983739837398374
F1 Score: 0.9791228354102606
Confusion Matrix:
 [[ 462   79]
 [  50 3025]]


In [None]:
# Evaluate the baseline model on the validation data
val_loss, val_accuracy = baseline_model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

# Get predictions for the validation data
val_preds = baseline_model.predict(val_generator)
val_preds = (val_preds > 0.5).astype(int)  # Convert probabilities to class labels

# Get true labels from the validation generator
val_true = val_generator.classes

val_precision = precision_score(val_true, val_preds)
val_recall = recall_score(val_true, val_preds)
val_f1 = f1_score(val_true, val_preds)
val_conf_matrix = confusion_matrix(val_true, val_preds)

print("Validation Metrics:")
print(f"Precision: {val_precision}")
print(f"Recall: {val_recall}")
print(f"F1 Score: {val_f1}")
print(f"Confusion Matrix:\n {val_conf_matrix}")


## Testing a L1 Regularized Neural Network

In [9]:
# Define, wrap, and test the L1 regularization model 

def create_l1_model(input_shape=(150, 150, 3)):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l1(0.01)))
    model.add(layers.Dense(1, activation='sigmoid'))  # Sigmoid activation for binary classification
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model using SciKeras
keras_wrapper_l1 = KerasClassifier(model=create_l1_model, epochs=50, batch_size=32, verbose=0)

# Perform cross-validation
np.random.seed(123)
cv_l1_preds = cross_val_predict(keras_wrapper_l1, X_train_all, y_train_scaled, cv=5)

# Evaluate the predictions
accuracy_l1 = accuracy_score(y_train_scaled, cv_l1_preds)
precision_l1 = precision_score(y_train_scaled, cv_l1_preds)
recall_l1 = recall_score(y_train_scaled, cv_l1_preds)
f1_l1 = f1_score(y_train_scaled, cv_l1_preds)
conf_matrix_l1 = confusion_matrix(y_train_scaled, cv_l1_preds)

print("L1 Regularization Model Metrics:")
print("Accuracy:", accuracy_l1)
print("Precision:", precision_l1)
print("Recall:", recall_l1)
print("F1 Score:", f1_l1)
print("Confusion Matrix:\n", conf_matrix_l1)


L1 Regularization Model Metrics:
Accuracy: 0.8503871681415929
Precision: 0.8503871681415929
Recall: 1.0
F1 Score: 0.9191451203108653
Confusion Matrix:
 [[   0  541]
 [   0 3075]]
