In [1]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

TRAIN_IMAGE_DIR = './dogs-vs-cats-identification/train' 

filenames = os.listdir(TRAIN_IMAGE_DIR)

df = pd.DataFrame({
    'filename': filenames,
    'class': ['cat' if 'cat' in fname else 'dog' for fname in filenames]
})

train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1
)

IMAGE_SIZE = 96
BATCH_SIZE = 32

train_data = datagen.flow_from_dataframe(
    train_df, TRAIN_IMAGE_DIR, x_col='filename', y_col='class',
    target_size=(IMAGE_SIZE, IMAGE_SIZE), class_mode='binary', batch_size=BATCH_SIZE
)

val_data = datagen.flow_from_dataframe(
    val_df, TRAIN_IMAGE_DIR, x_col='filename', y_col='class',
    target_size=(IMAGE_SIZE, IMAGE_SIZE), class_mode='binary', batch_size=BATCH_SIZE
)

base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

filepath = './models/best_model.epoch{epoch:02d}-loss{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

callbacks = [checkpoint]
epochs = 100

history = model.fit(
    train_data, steps_per_epoch=len(train_data), epochs=epochs,
    validation_data=val_data, callbacks=callbacks
)

test_loss, test_acc = model.evaluate(val_data, verbose=2)
print('\nTest accuracy:', test_acc)
model.save('cat_dog_classifier.h5')

Found 20000 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.63295, saving model to ./models\best_model.epoch01-loss0.63.hdf5


  saving_api.save_model(


Epoch 2/100
Epoch 2: val_loss improved from 0.63295 to 0.61862, saving model to ./models\best_model.epoch02-loss0.62.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 0.61862 to 0.60772, saving model to ./models\best_model.epoch03-loss0.61.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 0.60772 to 0.60444, saving model to ./models\best_model.epoch04-loss0.60.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 0.60444 to 0.60123, saving model to ./models\best_model.epoch05-loss0.60.hdf5
Epoch 6/100
Epoch 6: val_loss did not improve from 0.60123
Epoch 7/100
Epoch 7: val_loss improved from 0.60123 to 0.58892, saving model to ./models\best_model.epoch07-loss0.59.hdf5
Epoch 8/100
Epoch 8: val_loss did not improve from 0.58892
Epoch 9/100
Epoch 9: val_loss did not improve from 0.58892
Epoch 10/100
Epoch 10: val_loss did not improve from 0.58892
Epoch 11/100
Epoch 11: val_loss did not improve from 0.58892
Epoch 12/100
Epoch 12: val_loss did not improve from 0.58892
Epoch 13/100
Epoch 13: val_los

In [4]:
# Assume `model` is the trained model and `val_data` is the validation dataset
test_loss, test_acc = model.evaluate(val_data, verbose=2)

# Calculate the baseline - predict the most frequent class
baseline = max(train_df['class'].value_counts()) / len(train_df)

# Generate the resume statement
accuracy_multiple = test_acc / baseline
statement = f"Achieved a top-1 accuracy of {test_acc*100:.2f}% ({accuracy_multiple:.2f}x baseline)"
print(statement)


157/157 - 28s - loss: 0.5701 - accuracy: 0.7008 - 28s/epoch - 179ms/step
Achieved a top-1 accuracy of 70.08% (1.40x baseline)
