In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jessicali9530/stanford-dogs-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'stanford-dogs-dataset' dataset.
Path to dataset files: /kaggle/input/stanford-dogs-dataset


In [7]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# 1. Define the root directory where the dataset was downloaded.
root_dir = '/kaggle/input/stanford-dogs-dataset'
images_dir = os.path.join(root_dir, 'images', 'Images')

# 2. Create a list of all image file paths by traversing the directory structure.
filepaths = []
labels = []
for breed_folder in os.listdir(images_dir):
    breed_path = os.path.join(images_dir, breed_folder)
    if os.path.isdir(breed_path):
        # 3. Extract the breed label for each image from its directory path.
        label = breed_folder.split('-')[-1] # Assuming the label is the last part after splitting by '-'
        for image_file in os.listdir(breed_path):
            if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                filepaths.append(os.path.join(breed_path, image_file))
                labels.append(label)

# 4. Create a pandas DataFrame with two columns: 'filepath' and 'label'
df = pd.DataFrame({'filepath': filepaths, 'label': labels})

# 5. Split the DataFrame into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

print("Training set size:", len(train_df))
print("Validation set size:", len(val_df))
display(train_df.head())
display(val_df.head())

Training set size: 16464
Validation set size: 4116


Unnamed: 0,filepath,label
2584,/kaggle/input/stanford-dogs-dataset/images/Ima...,pug
19862,/kaggle/input/stanford-dogs-dataset/images/Ima...,Chesapeake_Bay_retriever
15847,/kaggle/input/stanford-dogs-dataset/images/Ima...,Great_Pyrenees
17497,/kaggle/input/stanford-dogs-dataset/images/Ima...,Irish_wolfhound
5157,/kaggle/input/stanford-dogs-dataset/images/Ima...,beagle


Unnamed: 0,filepath,label
5760,/kaggle/input/stanford-dogs-dataset/images/Ima...,toy_terrier
925,/kaggle/input/stanford-dogs-dataset/images/Ima...,Maltese_dog
11838,/kaggle/input/stanford-dogs-dataset/images/Ima...,silky_terrier
2142,/kaggle/input/stanford-dogs-dataset/images/Ima...,Border_collie
5736,/kaggle/input/stanford-dogs-dataset/images/Ima...,toy_terrier


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# 2. Define the input shape
img_height, img_width = 128, 128
input_shape = (img_height, img_width, 3)

# 3. Create a sequential model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    # 4. Add the final dense layer for classification
    Dense(len(df['label'].unique()), activation='softmax')
])

# 5. Compile the model
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 6. Print a summary of the model architecture
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 2. Create an ImageDataGenerator for training data with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 3. Create a separate ImageDataGenerator for validation data without augmentation
val_datagen = ImageDataGenerator(rescale=1./255)

# 4. Create data generators for the training and validation sets
batch_size = 32
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='filepath',
    y_col='label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# 5. Train the compiled model
epochs = 10
steps_per_epoch = train_generator.n // train_generator.batch_size
validation_steps = val_generator.n // val_generator.batch_size

# Adjust steps to include the last partial batch
if train_generator.n % batch_size != 0:
    steps_per_epoch += 1
if val_generator.n % batch_size != 0:
    validation_steps += 1

history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=validation_steps
)

Found 16464 validated image filenames belonging to 119 classes.
Found 4116 validated image filenames belonging to 119 classes.


  self._warn_if_super_not_called()


Epoch 1/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 196ms/step - accuracy: 0.0140 - loss: 4.7718 - val_accuracy: 0.0313 - val_loss: 4.5763
Epoch 2/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 197ms/step - accuracy: 0.0298 - loss: 4.5554 - val_accuracy: 0.0505 - val_loss: 4.3706
Epoch 3/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 193ms/step - accuracy: 0.0507 - loss: 4.3406 - val_accuracy: 0.0663 - val_loss: 4.1752
Epoch 4/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 193ms/step - accuracy: 0.0682 - loss: 4.1868 - val_accuracy: 0.0758 - val_loss: 4.0738
Epoch 5/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 193ms/step - accuracy: 0.0853 - loss: 4.0601 - val_accuracy: 0.0887 - val_loss: 3.9868
Epoch 6/10
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 196ms/step - accuracy: 0.0949 - loss: 3.9604 - val_accuracy: 0.1081 - val_loss: 3.9454
Epoch

In [10]:
# 1. Evaluate the model on the validation set
loss, accuracy = model.evaluate(val_generator)

print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 69ms/step - accuracy: 0.1329 - loss: 3.7563
Validation Loss: 3.7468
Validation Accuracy: 0.1370
