In [7]:
## import packages
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np

In [5]:
## set up to split images into training and test set
from sklearn.model_selection import train_test_split


def split_images(csv_path, test_size=0.2, random_state=42):
    # Load the CSV file
    data = pd.read_csv(csv_path)

    # Assume the CSV has at least two columns: 'filename' and 'label'
    # If the CSV structure is different, modify accordingly
    filenames = data["image"]
    labels = data["labels"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        filenames,
        labels,
        test_size=test_size,
        random_state=random_state,
        stratify=labels,
    )

    # Create DataFrames for training and testing sets
    train_df = pd.DataFrame({"image": X_train, "labels": y_train})
    test_df = pd.DataFrame({"image": X_test, "labels": y_test})

    return train_df, test_df


## run

csv_path = "datasets/catdog/cat_dog.csv"
gloss_catdog = pd.read_csv(csv_path)
print(gloss_catdog.head())


train_df, test_df = split_images(csv_path)

# Optionally, save the DataFrames to CSV files
train_df.to_csv("datasets/catdog/train_images.csv", index=False)
test_df.to_csv("datasets/catdog/test_images.csv", index=False)

# Display the first few rows of the DataFrames
print(train_df.head())
print(test_df.head())

           image  labels
0  dog.11289.jpg       1
1  cat.10836.jpg       0
2   cat.7970.jpg       0
3   cat.2212.jpg       0
4   dog.2653.jpg       1
               image  labels
14949  dog.10901.jpg       1
10006   cat.1420.jpg       0
24551   dog.7506.jpg       1
2625    dog.7122.jpg       1
15921   dog.1586.jpg       1
               image  labels
6237   dog.10138.jpg       1
12482   dog.6839.jpg       1
17892   cat.8509.jpg       0
8016    cat.3989.jpg       0
5530    dog.2713.jpg       1


In [None]:
## setup for is.dog 2.0 -> classification of images into dog vs cat


# Function to load images and labels from a DataFrame
def load_images_from_dataframe(
    dataframe,
    img_height,
    img_width,
    batch_size,
    data_augmentation=False,
    base_dir="datasets/catdog",
):
    dataframe["image"] = base_dir + "/" + dataframe["image"]

    if data_augmentation:
        datagen = ImageDataGenerator(
            rescale=1.0 / 255,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode="nearest",
        )
    else:
        datagen = ImageDataGenerator(rescale=1.0 / 255)

    generator = datagen.flow_from_dataframe(
        dataframe,
        x_col="image",
        y_col="labels",
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode="categorical",
    )

    return generator


# Load the CSV data
train_df = pd.read_csv("train_images.csv")
test_df = pd.read_csv("test_images.csv")

# Image dimensions and parameters
img_height, img_width = 150, 150
batch_size = 32
num_classes = len(train_df["labels"].unique())

# Create data generators
train_generator = load_images_from_dataframe(
    train_df, img_height, img_width, batch_size, data_augmentation=True
)
test_generator = load_images_from_dataframe(
    test_df, img_height, img_width, batch_size, data_augmentation=False
)

# Building the CNN model
model = Sequential(
    [
        Conv2D(32, (3, 3), activation="relu", input_shape=(img_height, img_width, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation="relu"),
        Dropout(0.5),
        Dense(num_classes, activation="softmax"),
    ]
)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Training the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=20,
    validation_data=test_generator,
    validation_steps=test_generator.samples // batch_size,
)

# Evaluating the model
loss, accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {accuracy:.2f}")

# Save the model
model.save("isdog2_models/isdogv2_0.h5")