In [4]:
import tensorflow as tf 
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Rescaling
import numpy as np

# already have split data into directories 
# add the paths 
train_data_dir = "data/train/"
test_data_dir = "data/test/"
val_data_dir = "data/val/"

batch_size = 32
img_height = 150
img_width = 150

# rescale instance
rescale = Rescaling(scale=1.0/255)

# we do not set subset=both here because we do not want the test set split
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    seed=123,
    image_size=(img_height, img_width), batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    seed=123,
    image_size=(img_height, img_width), batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_data_dir,
    seed=123,
    image_size=(img_height, img_width), batch_size=batch_size
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

Found 624 files belonging to 2 classes.
Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.


In [5]:
for images, labels in train_ds.take(1):  # Take one batch
    print("Train batch shape:", images.shape)  # Shape: (batch_size, height, width, channels)
    print("Train labels shape:", labels.shape)  # Shape: (batch_size,)


Train batch shape: (32, 150, 150, 3)
Train labels shape: (32,)


2025-05-03 16:28:19.392973: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [6]:
for images, labels in test_ds.take(1):
    print("Test batch shape:", images.shape)
    print("Test labels shape:", labels.shape)


Test batch shape: (32, 150, 150, 3)
Test labels shape: (32,)


2025-05-03 16:28:19.498655: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:
import matplotlib.pyplot as plt

# Get one batch of images and labels from the rescaled dataset
for images, labels in train_rescale_ds.take(1):  
    X_train_batch = images.numpy()  # Convert to NumPy array
    y_train_batch = labels.numpy()  # Convert labels to NumPy
    break

# Plot the first 5 images
plt.figure(figsize=(10, 2))
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.imshow(X_train_batch[i])  # No need for uint8 conversion since values are [0,1]
    plt.axis("off")

print('Label for each of the above images:', y_train_batch[:5])

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

input_shape=(150, 150, 3)
num_classes = 2

model = keras.Sequential([
        layers.InputLayer(input_shape=input_shape),
        layers.Rescaling(1./255),
       
        layers.Conv2D(32, 3, activation='relu'),
        layers.MaxPooling2D(),
       
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
       
        layers.Conv2D(128, 3, activation='relu'),
        layers.MaxPooling2D(),
       
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')])

# Compile the model
model.compile(optimizer=Adam(),
              loss='sparse_categorical_crossentropy',  # Binary classification loss
              metrics=['accuracy'])

# Print model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_6 (Rescaling)     (None, 150, 150, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 34, 34, 128)       7

In [None]:
# Train the model
history = model.fit(
    train_rescale_ds,
    validation_data=val_rescale_ds,
    epochs=20,  # Can be adjusted
    batch_size=32,
    verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(test_rescale_ds)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:}")


Test Loss: 0.7100
Test Accuracy: 0.625


In [None]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import keras_tuner as kt

def model_builder(hp):
    model = keras.Sequential()
    model.add(layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)))
    
    # Tune the number of Conv2D layers and filters
    for i in range(hp.Int('conv_blocks', 1, 3)):
        filters = hp.Int(f'filters_{i}', min_value=32, max_value=128, step=32)
        model.add(layers.Conv2D(filters, (3,3), activation='relu'))
        model.add(layers.MaxPooling2D())
    
    model.add(layers.Flatten())
    
    # Tune number of units in dense layer
    hp_units = hp.Int('dense_units', min_value=64, max_value=256, step=64)
    model.add(layers.Dense(units=hp_units, activation='relu'))
    
    # Tune dropout rate
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)
    model.add(layers.Dropout(hp_dropout))
    
    model.add(layers.Dense(2, activation='softmax'))
    
    # Tune optimizer
    hp_learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop'])
    
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
#evaluation metrics function
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def evaluate_model(model, dataset):
    y_true = []
    y_pred = []
    
    for images, labels in dataset:
        y_true.extend(labels.numpy())
        preds = model.predict(images, verbose=0)
        y_pred.extend(np.argmax(preds, axis=1))  # For multi-class
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='weighted'),
        'recall': recall_score(y_true, y_pred, average='weighted'),
        'f1': f1_score(y_true, y_pred, average='weighted')
    }

In [None]:
best_hps = None
def run_tuning(tuner_type):
    """Run hyperparameter tuning with given strategy"""
    if tuner_type == "hyperband":
        tuner = kt.Hyperband(
            model_builder,
            objective='val_accuracy',
            max_epochs=20,
            factor=3,
            project_name='pneumonia_hyperband'
        )
    elif tuner_type == "bayesian":
        tuner = kt.BayesianOptimization(
            model_builder,
            objective='val_accuracy',
            max_trials=30,
            num_initial_points=10,
            project_name='pneumonia_bayesian'
        )
    else:  # random
        tuner = kt.RandomSearch(
            model_builder,
            objective='val_accuracy',
            max_trials=30,
            project_name='pneumonia_random'
        )
    
    tuner.search(
        train_ds,
        validation_data=val_ds,
        epochs=20,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]
    )
    
    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    return evaluate_model(best_model, val_ds)

Trial 30 Complete [00h 14m 19s]
val_accuracy: 0.9375

Best val_accuracy So Far: 1.0
Total elapsed time: 02h 42m 31s

Best hyperparameters:
- Conv blocks: 1
- Filters (first block): 128
- Dense units: 64
- Dropout: 0.2
- Optimizer: adam
- Learning rate: 0.0001



In [None]:
# run all tuners
print("Running Hyperband tuning...")
hyperband_metrics = run_tuning("hyperband")

print("Running Bayesian tuning...")
bayesian_metrics = run_tuning("bayesian")

print("Running Random Search tuning...")
random_metrics = run_tuning("random")

print(f"""
Best hyperparameters:
- Conv blocks: {best_hps.get('conv_blocks')}
- Filters (first block): {best_hps.get('filters_0')}
- Dense units: {best_hps.get('dense_units')}
- Dropout: {best_hps.get('dropout')}
- Optimizer: {best_hps.get('optimizer')}
- Learning rate: {best_hps.get('learning_rate')}
""")

In [None]:
metrics_df = pd.DataFrame({
    'Hyperband': hyperband_metrics,
    'Bayesian': bayesian_metrics,
    'Random': random_metrics
}).T

print("\n=== Numerical Results ===")
print(metrics_df)

In [None]:
# visualizations

plt.figure(figsize=(12, 6))

# Bar plot for all metrics
metrics_df.plot(kind='bar', width=0.8)
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.xticks(rotation=0)
plt.ylim(0, 1.05)
plt.legend(loc='lower right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('tuning_comparison.png')
plt.show()

# Radar chart for comprehensive comparison
categories = list(metrics_df.columns)
N = len(categories)

angles = [n / float(N) * 2 * np.pi for n in range(N)]
angles += angles[:1]

plt.figure(figsize=(6, 6))
ax = plt.subplot(111, polar=True)

for method, row in metrics_df.iterrows():
    values = row.values.flatten().tolist()
    values += values[:1]
    ax.plot(angles, values, linewidth=2, label=method)
    ax.fill(angles, values, alpha=0.1)

plt.xticks(angles[:-1], categories)
plt.title('Tuning Method Comparison', y=1.1)
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
plt.tight_layout()
plt.savefig('radar_comparison.png')
plt.show()


In [None]:
best_method = metrics_df['f1'].idxmax()
print(f"\n✅ Best Method: {best_method} (F1-score: {metrics_df.loc[best_method, 'f1']:.3f})")

In [None]:
# Rebuild the best model with the best hyperparameters
model = model_builder(best_hps) # maybe use best_model instead?

In [None]:
model.save('./models/best_pneumonia_model.keras')