**VERSION ONE:** test code using kaggle dataset
> Add blockquote



In [1]:
# Dependencies
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import kagglehub
import random

In [2]:
# PRACTICE DATA FROM KAGGLE
path = kagglehub.dataset_download("jessicali9530/stanford-dogs-dataset")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/jessicali9530/stanford-dogs-dataset?dataset_version_number=2...


100%|██████████| 750M/750M [00:06<00:00, 117MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2


In [3]:
# Directories for images and annotations
images_dir = os.path.join(path, 'images')
annotations_dir = os.path.join(path, 'annotations', 'Annotation')

In [4]:
# Set smaller image size and max sample size for testing
image_size = (128, 128)
max_samples = 500

In [5]:
# Load the annotations file (assuming it is a JSON file)
annotation_file = os.path.join(annotations_dir, 'annotations.json')

In [6]:
def load_images_and_labels(images_dir, annotations_dir, image_size=(224, 224), max_samples=1000):
    images = []
    labels = []
    class_map = {}  # To map breed names to integer labels
    class_index = 0

   # Loop through each breed folder inside the annotations directory
    for breed_folder in os.listdir(annotations_dir):
        breed_folder_path = os.path.join(annotations_dir, breed_folder)

        # Check if it's a directory (breed folder)
        if os.path.isdir(breed_folder_path):
            if breed_folder not in class_map:
                class_map[breed_folder] = class_index
                class_index += 1

            # Now, look for the images in the corresponding breed folder in the 'Images' subfolder
            breed_image_folder = os.path.join(images_dir, 'Images', breed_folder)
            if os.path.isdir(breed_image_folder):
                breed_images = []
                for img_file in os.listdir(breed_image_folder):
                    img_path = os.path.join(breed_image_folder, img_file)  # Full image path
                    breed_images.append(img_path)

                # Shuffle the images and sample a subset if there are too many
                random.shuffle(breed_images)
                breed_images = breed_images[:max_samples // len(class_map)]  # Adjust number based on max_samples

                # Load only the selected images
                for img_path in breed_images:
                    # Check if the image exists
                    if os.path.exists(img_path):
                        print(f"Loading image: {img_path}")  # Print the image path being loaded
                        # Load and preprocess the image
                        img = image.load_img(img_path, target_size=image_size)
                        img_array = image.img_to_array(img)
                        img_array = preprocess_input(img_array)  # Preprocessing for VGG19 (e.g., normalization)

                        images.append(img_array)
                        labels.append(class_map[breed_folder])

    return np.array(images), np.array(labels), class_map

In [7]:
# Load images and labels with the new function
X, y, class_map = load_images_and_labels(images_dir, annotations_dir, image_size=(128, 128), max_samples=500)


Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_3225.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_6248.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_539.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_2986.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_7484.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-dataset/versions/2/images/Images/n02102040-English_springer/n02102040_1515.jpg
Loading image: /root/.cache/kagglehub/datasets/jessicali9530/stanford-dogs-da

In [8]:
# Convert labels to one-hot encoding
y = to_categorical(y, num_classes=len(class_map))

In [9]:
# Split the data into training and validation sets (80-20 split)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# Build the model with the correct Dense layer input size
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),  # Add more pooling layers if needed

    # Flatten the output from the convolutional layers before passing it to Dense layers
    Flatten(),  # Flatten the 3D output to 1D

    # Adjusted Dense layer based on the new flattened size
    Dense(128, activation='relu'),  # Fully connected layer
    Dense(len(class_map), activation='softmax')  # Output layer for the number of classes
])

In [18]:
# Print the model summary to check the output shapes at each layer
model.summary()

In [19]:
# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
# Set smaller batch size to reduce memory usage during training
batch_size = 16  # Use a smaller batch size to avoid memory overload

In [21]:
# Train the model on the dataset
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=batch_size)


Epoch 1/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 526ms/step - accuracy: 0.0476 - loss: 17.7048 - val_accuracy: 0.1230 - val_loss: 4.2959
Epoch 2/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 526ms/step - accuracy: 0.1330 - loss: 4.2497 - val_accuracy: 0.1253 - val_loss: 4.1720
Epoch 3/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 503ms/step - accuracy: 0.2370 - loss: 3.4449 - val_accuracy: 0.1163 - val_loss: 4.3611
Epoch 4/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 494ms/step - accuracy: 0.4437 - loss: 2.2966 - val_accuracy: 0.1074 - val_loss: 5.2650
Epoch 5/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 515ms/step - accuracy: 0.7946 - loss: 0.8642 - val_accuracy: 0.1051 - val_loss: 6.9736
Epoch 6/10
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 512ms/step - accuracy: 0.9487 - loss: 0.2284 - val_accuracy: 0.0805 - val_loss: 11.1984
Epoch 7/

In [23]:
# Evaluate the model on the test data
model_loss, model_accuracy = model.evaluate(X_train, y_train, verbose=2)

# Print the loss and accuracy
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

56/56 - 14s - 244ms/step - accuracy: 0.9978 - loss: 0.0296
Loss: 0.029620390385389328, Accuracy: 0.997761607170105
