In [78]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras import models
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [79]:
DATASET_DIR = './data'
class_names = ['yes', 'no']
image_size = (128, 128)
batch_size = 32

checkpoint_path = "./model_checkpoints/"
checkpoint_name = "model-{epoch:02d}-{val_accuracy:.4f}.h5"
checkpoint_filepath = os.path.join(checkpoint_path, checkpoint_name)

In [80]:
# Create the tf datasets
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR + '/train/',
    labels='inferred', 
    label_mode='binary',  
    class_names=class_names,
    color_mode='rgb',  
    batch_size=batch_size,
    image_size=image_size,
    shuffle=True, 
    seed=123,  
    validation_split=0.2,  # Split dataset into training and validation
    subset='training'  
)


validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR + '/train/',
    labels='inferred',
    label_mode='binary',
    class_names=class_names,
    color_mode='rgb',
    batch_size=batch_size,
    image_size=image_size,
    shuffle=True,
    seed=123,
    validation_split=0.2,
    subset='validation'  
)

test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR + '/test/',
    labels='inferred',
    label_mode='binary',
    class_names=class_names,
    color_mode='rgb',
    batch_size=batch_size,
    image_size=image_size,
    shuffle=True,
    seed=123,
)

Found 222 files belonging to 2 classes.
Using 178 files for training.


Found 222 files belonging to 2 classes.
Using 44 files for validation.
Found 31 files belonging to 2 classes.


In [89]:
model = models.Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=image_size + (3,)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu'),
    Flatten(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Use 'sigmoid' activation for binary classification
])

In [90]:
# Define early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=20, 
    restore_best_weights=True  
)

checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,  
    monitor='val_accuracy', 
    save_best_only=True,
    save_weights_only=False,  # Save the entire model
    mode='max',
    verbose=1
)

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy', 
              metrics=['accuracy'])


# Display the model summary
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_21 (Conv2D)          (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d_14 (MaxPooli  (None, 63, 63, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_22 (Conv2D)          (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_15 (MaxPooli  (None, 30, 30, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_23 (Conv2D)          (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_16 (MaxPooli  (None, 14, 14, 128)      

In [91]:
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=100,  
    callbacks=[early_stopping, checkpoint_callback] 
)

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.47727, saving model to ./model_checkpoints/model-01-0.4773.h5
Epoch 2/100


  saving_api.save_model(


Epoch 2: val_accuracy improved from 0.47727 to 0.75000, saving model to ./model_checkpoints/model-02-0.7500.h5
Epoch 3/100
Epoch 3: val_accuracy improved from 0.75000 to 0.77273, saving model to ./model_checkpoints/model-03-0.7727.h5
Epoch 4/100
Epoch 4: val_accuracy did not improve from 0.77273
Epoch 5/100
Epoch 5: val_accuracy improved from 0.77273 to 0.79545, saving model to ./model_checkpoints/model-05-0.7955.h5
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.79545
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.79545
Epoch 8/100
Epoch 8: val_accuracy did not improve from 0.79545
Epoch 9/100
Epoch 9: val_accuracy did not improve from 0.79545
Epoch 10/100
Epoch 10: val_accuracy did not improve from 0.79545
Epoch 11/100
Epoch 11: val_accuracy did not improve from 0.79545
Epoch 12/100
Epoch 12: val_accuracy did not improve from 0.79545
Epoch 13/100
Epoch 13: val_accuracy did not improve from 0.79545
Epoch 14/100
Epoch 14: val_accuracy did not improve from 0.79545
Epo

In [92]:
# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(test_dataset)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Make predictions on the test dataset
predictions = model.predict(test_dataset)
binary_predictions = (predictions > 0.5).astype("int32")

print("Class predictions for the first few samples:")
print(binary_predictions[:5])


Test Loss: 0.47056296467781067
Test Accuracy: 0.9032257795333862
Class probabilities for the first few samples:
[[0]
 [1]
 [0]
 [0]
 [1]]


In [85]:
# Get best checkpoint
checkpoint_files = [f for f in os.listdir(checkpoint_path) if f.endswith('.h5')]

# Sort the checkpoint files based on validation accuracy (assuming the filename convention)
checkpoint_files.sort(key=lambda x: float(x.split('-')[-1][:-3]), reverse=True)

# Get the filename of the checkpoint with the highest validation accuracy
best_checkpoint_file = checkpoint_files[0]

best_checkpoint_path = os.path.join(checkpoint_path, best_checkpoint_file)
model = load_model(best_checkpoint_path)
