# Dog Breed Identification - Model Training

This notebook contains the code for training a deep learning model to identify dog breeds from images.

## 1. Import Libraries

In [26]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

## Data Augmentation and Dataset Loading

**IMPORTANT:** Populate the dataset folders with dog breed images before running this section:
- Place training images in `dataset/train/[breed_name]/` 
- Place test images in `dataset/test/[breed_name]/`
- Supported formats: .jpg, .png

In [27]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

## Verify Dataset Structure

In [28]:
import os
import glob

# Check dataset structure
train_dir = "dataset/train"
test_dir = "dataset/test"

# Count total images
# Support common image extensions (case variations included)
extensions = ['jpg','jpeg','png','JPG','JPEG','PNG']
train_images = []
test_images = []
for ext in extensions:
    train_images += glob.glob(os.path.join(train_dir, f'*/*.{ext}'))
    test_images += glob.glob(os.path.join(test_dir, f'*/*.{ext}'))

print(f"Training images found: {len(train_images)}")
print(f"Test images found: {len(test_images)}")

if len(train_images) == 0 or len(test_images) == 0:
    print("\n⚠️  WARNING: Dataset appears empty or no supported image files were found.")
    print("Please add image files to the dataset folders, e.g.")
    print("  - dataset/train/[breed_name]/*.jpg (or .jpeg/.png)")
    print("  - dataset/test/[breed_name]/*.jpg (or .jpeg/.png)")
    print("Also check file name case (JPG vs jpg) and that subfolders exist for each class.")
else:
    print("\n✅ Dataset looks ready for training!")
    print(f"Found {len(train_images)} training images and {len(test_images)} test images.")

Training images found: 12
Test images found: 12

✅ Dataset looks ready for training!
Found 12 training images and 12 test images.


In [29]:
# Create small dummy dataset (3 images per class) if you don't have real images yet
try:
    from PIL import Image
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pillow'])
    from PIL import Image
import os
train_dir = 'dataset/train'
test_dir = 'dataset/test'
classes = ['pug', 'beagle']  # change or extend as needed
for base in (train_dir, test_dir):
    for cls in classes:
        d = os.path.join(base, cls)
        os.makedirs(d, exist_ok=True)
        for i in range(3):
            img = Image.new('RGB', (128,128), (50 + i*50, 100 + i*30, 150 + i*20))
            img.save(os.path.join(d, f'sample_{i}.png'))
print('Created dummy images for classes:', classes)

Created dummy images for classes: ['pug', 'beagle']


In [30]:
train_data = train_datagen.flow_from_directory(
    "dataset/train",
    target_size=(128,128),
    batch_size=32,
    class_mode="categorical",
)

test_data = test_datagen.flow_from_directory(
    "dataset/test",
    target_size=(128,128),
    batch_size=32,
    class_mode="categorical",
)

# Print diagnostics so we can see why training might fail
print(f"train_data.samples = {train_data.samples}")
print(f"test_data.samples = {test_data.samples}")
print(f"train_data.class_indices = {train_data.class_indices}")

# Flag indicating whether data is present for training
data_ready = (train_data.samples > 0 and test_data.samples > 0)

Found 6 images belonging to 2 classes.
Found 6 images belonging to 3 classes.
train_data.samples = 6
test_data.samples = 6
train_data.class_indices = {'beagle': 0, 'pug': 1}


## Build VGG19 Model

In [31]:
base_model = VGG19(
    weights="imagenet",
    include_top=False,
    input_shape=(128,128,3)
)

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
output = Dense(20, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=output)

## Compile Model

In [32]:
model.compile(
    optimizer=Adam(),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

## Train Model

In [33]:
if 'data_ready' in globals() and data_ready:
    model.fit(
        train_data,
        epochs=6,
        validation_data=test_data
    )
else:
    print("Dataset not ready for training. Add images to dataset/train and dataset/test, then re-run verification.")

Epoch 1/6


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 2), output.shape=(None, 20)

## Save Model

In [34]:
model.save("dogbreed.h5")

