# Go wild -- this assignment is to recognize Cats and Dogs. The load dataset function is loading the data properly.
# Note that the image are in color, i.e., different from the MNIST and Fashion image. The tensors will be different
# The rest is yours. Here are a few suggestions

# 1. Start with a basic model -- that is your baseline
# 2. Move a convolutional network
# 3. Finish with a convolutional network with data augmentation (see the video)

In [None]:
# USE ONLY IF using a GPU on PACE

# If this is not imported the GPU on the PACE will not function properly
# otherwise, on another system, it will need built with cuda and loaded 
# the same version to work properly
# os.system("module load cuda/12.1.1")
import os
import time
os.system("module load cuda/12.1.1")
time.sleep(20)

In [None]:
from tensorflow import keras
from keras import layers
from keras.datasets import imdb
import numpy as np
import os, shutil, pathlib
from keras.utils import load_img
from keras.utils import img_to_array
from keras.utils import array_to_img
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image
size_picture = 128

In [None]:
from keras.utils import image_dataset_from_directory

In [None]:
def tensor_from_image(name):
    img = load_img(name)
    img = img.resize((size_picture,size_picture))
    return (img,img_to_array(img).astype("uint8").reshape((size_picture,size_picture,3)))  

In [None]:
def load_dataset(start_index, end_index, dataset_type="training_set"):
    images = []
    total_images = 2 * (end_index - start_index + 1)  # 2x for cats + dogs
    tensor_inputs = np.zeros((total_images, size_picture, size_picture, 3))
    tensor_labels = np.zeros((total_images, 1))
    
    # ✅ Handle folder name differences (test_set uses plural)
    category_folder = {
        "cat": "cats" if dataset_type == "test_set" else "cat",
        "dog": "dogs" if dataset_type == "test_set" else "dog"
    }
    
    base_directory = pathlib.Path(f'../datasets/cats_and_dogs/{dataset_type}')
    k = 0  # Track loaded images
    
    for category in {"cat", "dog"}:
        folder = category_folder[category]
        names = [f"{category}.{i}.jpg" for i in range(start_index, end_index + 1)]
        
        for n in names:
            fname = base_directory / folder / n
            img, t = tensor_from_image(fname)  # ✅ Assumes file exists
            images.append(img)
            tensor_inputs[k] = t
            tensor_labels[k] = (category == 'dog')  # 1 for dog, 0 for cat
            k += 1
    
    return images, tensor_inputs, tensor_labels

# ✅ Training data (1-4000 from training_set)
training_images, training_inputs, training_labels = load_dataset(1, 4000, "training_set")

# ✅ Testing data (4001-5000 from test_set)
testing_images, testing_inputs, testing_labels = load_dataset(4001, 5000, "test_set")

In [None]:
def binary_classify(model,inputs,outputs):
  callbacks_list = [
      keras.callbacks.EarlyStopping(monitor = "val_binary_accuracy", patience = 2, mode = "max")
  ]
  model.compile(optimizer="rmsprop",loss="binary_crossentropy",metrics=["accuracy"])
  model.fit(inputs,outputs,epochs=10)
  if val_data:
      model.fit(inputs, outputs, epochs=epochs, validation_data=val_data, batch_size=32)
  else:
      model.fit(inputs, outputs, epochs=epochs, batch_size=32)
  return model

In [None]:
# Training data (1-4000 from training_set)
 training_images, training_inputs, training_labels = load_dataset(1, 4000,"training_set")
 # Testing data (4001-5000 from test_set)
 testing_images, testing_inputs, testing_labels = load_dataset(4001, 5000,"test_set")
 size_picture = 128
 [ ]: # Define the split index
 split_index = int(0.8 * len(training_images)) # 80% for training, 20% for␣validation
 # Partition the data
 partial_train_data = train_data[10000:]
 partial_train_labels = train_labels[10000:]
 validation_data = train_data[:10000]
 validation_labels = train_labels[:10000]

In [None]:
(training_images,training_inputs,training_labels) = load_dataset(1,3000)
(testing_images,testing_inputs,testing_labels) = load_dataset(3001,3999)

In [None]:
def build_model_DNN():
    input = keras.Input(shape=(180, 180, 3), name="cats_or_dogs")
    x = layers.Rescaling(1./255)(input)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dense(64, activation="relu")(x)
    output = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inputs=input, outputs=output)


In [None]:
def build_model2_CNN():
    input = keras.Input(shape=(180, 180, 3), name="image_input")
    x = layers.Rescaling(1./255)(input)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64, activation='relu')(x)
    output = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=input, outputs=output)
return model

In [None]:
def build_model3_augmented_CNN():
    datagen = keras.sequential([
        layer.RandomFlip("Horizontal"),
        layers.RandomRotation(0.1),
        layer.RandomZoom(0.2),
    ])
    input = keras.Input(shape=(size_picture, size_picture, 3),name = "data")
    x= datagen(input)
    x= layers.Rescaling(1/255.0)(x)
    
    x = layers.Conv2D(filters = 32, kernel_size = 3, activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2,2))(x)
    x = layers.Conv2D(filters = 64, kernel_size = 3, activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2,2))(x)
    x = layers.Conv2D(filters = 128, kernel_size = 3, activation='relu'(x)
    x = layers.Flatten()(x)

    x= Dense(64,activation = "relu")(x)
    x= Dense(32,activation = "relu")(x)
    output = Dense(1,activation = "sigmoid")(x)

model = keras.Model(inputs=input, outputs=output)
return model

In [None]:
dnn_model = binary_classify(build_model_DNN, train_inputs, train_labels, val_data=(val_inputs, val_labels))
cnn_model = binary_classify(build_model2_CNN, train_inputs, train_labels, val_data=(val_inputs, val_labels))
cnn_aug_model = binary_classify(build_model3_augmented_CNN, train_inputs, train_labels, val_data=(val_inputs, val_labels))

In [None]:
print("Evaluating DNN:")
test_loss_dnn, test_acc_dnn = dnn_model.evaluate(test_inputs, test_labels)
print(f"DNN Accuracy: {test_acc_dnn:.4f}")

In [None]:
print("Evaluating CNN:")
test_loss_cnn, test_acc_cnn = cnn_model.evaluate(test_inputs, test_labels)
print(f"CNN Accuracy: {test_acc_cnn:.4f}")

In [None]:
print("Evaluating Augmented CNN:")
test_loss_aug, test_acc_aug = cnn_aug_model.evaluate(test_inputs, test_labels)
print(f"Augmented CNN Accuracy: {test_acc_aug:.4f}")