In [None]:
#====================================================================================================#
#                                                                                                    #
#                                                        ██╗   ██╗   ████████╗ █████╗ ██████╗        #
#      Competición - INAR                                ██║   ██║   ╚══██╔══╝██╔══██╗██╔══██╗       #
#                                                        ██║   ██║█████╗██║   ███████║██║  ██║       #
#      created:        29/10/2025  -  23:00:15           ██║   ██║╚════╝██║   ██╔══██║██║  ██║       #
#      last change:    01/11/2025  -  18:34:20           ╚██████╔╝      ██║   ██║  ██║██████╔╝       #
#                                                         ╚═════╝       ╚═╝   ╚═╝  ╚═╝╚═════╝        #
#                                                                                                    #
#      Ismael Hernandez Clemente                         ismael.hernandez@live.u-tad.com             #
#                                                                                                    #
#      Github:                                           https://github.com/ismaelucky342            #
#                                                                                                    #
#====================================================================================================#

### This is a minimal working "starter" notebook for the [[U-Tad] Dogs vs. Cats 2025](https://www.kaggle.com/competitions/u-tad-dogs-vs-cats-2025) competition.
Click on **Copy & Edit** to copy this notebook to your account and start working.

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow import data as tf_data
import keras

seed = 42
keras.utils.set_random_seed(seed)

### Read in the training data

In [None]:
image_size = (256, 256)

# when working with 20_000 files for training this
# will lead to exactly 160 mini-batches per epoch
batch_size = 125

# https://keras.io/api/data_loading/image/#imagedatasetfromdirectory-function
train_ds, val_ds = keras.utils.image_dataset_from_directory(
    #"PetImages",
    "/kaggle/input/u-tad-dogs-vs-cats-2025/train/train",
    validation_split=0.2,
    subset="both",
    seed=seed,
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="categorical",
)

### A basic sequential CNN model

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

input_shape = image_size + (3,)

model = Sequential()
model.add(keras.Input(shape=input_shape))
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.summary()

### Compile and train (fit)

In [None]:
%%time

model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=0.001,
                                                 momentum=0.0),
                                                 loss='categorical_crossentropy',
                                                 metrics=['accuracy'])

epochs = 12

history = model.fit(train_ds,
                    validation_data = val_ds,
                    epochs = epochs,)

### Plot the learning curves

In [None]:
logs = pd.DataFrame(history.history)

plt.figure(figsize=(14, 4))
plt.subplot(1, 2, 1)
plt.plot(logs.loc[1:,"loss"], lw=2, label='training loss')
plt.plot(logs.loc[1:,"val_loss"], lw=2, label='validation loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(logs.loc[1:,"accuracy"], lw=2, label='training accuracy')
plt.plot(logs.loc[1:,"val_accuracy"], lw=2, label='validation accuracy')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(loc='lower right')
plt.show()

### Save the trained model

In [None]:
model.save("model.keras")

### Evaluate model performance using the `supplementary_data`

In [None]:
supplementary_ds = keras.utils.image_dataset_from_directory(
    #"PetImages",
    "/kaggle/input/u-tad-dogs-vs-cats-2025/supplementary_data/supplementary_data",
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="categorical",
)

model.evaluate(supplementary_ds,
               return_dict=True,
               verbose=1)

### Create predictions for all of the test images
(Do not modify this section)

In [None]:
%%time

folder_path = "/kaggle/input/u-tad-dogs-vs-cats-2025/test/test"

predictions_dict = {}

for img in os.listdir(folder_path):
    img = os.path.join(folder_path, img)
    
    # save the image name
    file_name = img.split('/')[-1]
    file_no_extension = file_name.split('.')[0]
    
    img = keras.utils.load_img(img, target_size=image_size)
    img_array = keras.utils.img_to_array(img)
    img_array = keras.ops.expand_dims(img_array, 0)
    prediction = model.predict(img_array, verbose=None)
    label = np.argmax(prediction)

    # save the predictions to a dictionary
    predictions_dict[int(file_no_extension)] = label

### Save your predictions to a competition submission file

In [None]:
submission = pd.DataFrame(predictions_dict.items(), columns=["id", "label"]).sort_values(by='id', ascending=True)
submission.to_csv('submission.csv',index=False)

# print numbers of each class label
submission["label"].value_counts()