## Data extraction

In [None]:
import os, shutil, pathlib

original_dir = pathlib.Path("data")
new_base_dir = pathlib.Path("subset")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir, exist_ok=True)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=1, end_index=100)
make_subset("validation", start_index=100, end_index=150)
make_subset("test", start_index=150, end_index=250)

## Data preprocessing

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import numpy as np
from pathlib import Path

new_base_dir = Path("subset")
train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=32)
validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size=(180, 180),
    batch_size=32)
test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=32)

base_model = keras.models.load_model("base.h5")

def get_features_and_labels(dataset):
    all_features = []
    all_labels = []
    for images, labels in dataset:
        preprocessed_images = preprocess_input(
            images, mode="caffe"
        )
        features = base_model.predict(preprocessed_images)
        all_features.append(features)
        all_labels.append(labels)
    return np.concatenate(all_features), np.concatenate(all_labels)


train_features, train_labels = get_features_and_labels(train_dataset)
val_features, val_labels = get_features_and_labels(validation_dataset)
test_features, test_labels = get_features_and_labels(test_dataset)

Path("preprocessed").mkdir(exist_ok=True)
np.save("preprocessed/train_features.npy", train_features)
np.save("preprocessed/train_labels.npy", train_labels)
np.save("preprocessed/val_features.npy", val_features)
np.save("preprocessed/val_labels.npy", val_labels)
np.save("preprocessed/test_features.npy", test_features)
np.save("preprocessed/test_labels.npy", test_labels)

## Training

In [None]:
train_features = np.load("preprocessed/train_features.npy")
train_labels = np.load("preprocessed/train_labels.npy")
val_features = np.load("preprocessed/val_features.npy")
val_labels = np.load("preprocessed/val_labels.npy")

inputs = keras.Input(shape=(5, 5, 512))
x = layers.Flatten()(inputs)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

model.fit(train_features, train_labels,
          epochs=20,
          validation_data=(val_features, val_labels))

model.save("model.h5")

## Evaluation

In [None]:
test_features = np.load("preprocessed/test_features.npy")
test_labels = np.load("preprocessed/test_labels.npy")

test_model = keras.models.load_model("model.h5")
test_loss, test_acc = test_model.evaluate(test_features, test_labels)
print(f"Test accuracy: {test_acc:.3f}")