In [22]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split


In [23]:
# Set the directory paths
data_dir = "file location"
labels_file = "file location"


In [24]:
# Load labels from the CSV file
labels_df = pd.read_csv(labels_file)

# Create a dictionary mapping image filenames to their respective breed labels
labels_dict = dict(zip(labels_df.id, labels_df.breed))


In [25]:
# Get the list of image filenames
image_files = os.listdir(data_dir)

# Prepare lists to store image paths and corresponding labels
image_paths = []
labels = []

# Iterate through image files and populate image paths and labels lists
for image_file in image_files:
    image_path = os.path.join(data_dir, image_file)
    image_paths.append(image_path)
    image_id = os.path.splitext(image_file)[0]
    labels.append(labels_dict[image_id])


In [26]:
# Split the data into training and validation sets
train_image_paths, val_image_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=42)


In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Data augmentation and normalization for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Data normalization for validation set (no augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators for training and validation sets
batch_size = 32
train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({"filename": train_image_paths, "class": train_labels}),
    directory=None,
    x_col="filename",
    y_col="class",
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({"filename": val_image_paths, "class": val_labels}),
    directory=None,
    x_col="filename",
    y_col="class",
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False
)

# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)  # Add dropout for regularization
predictions = Dense(len(labels_df['breed'].unique()), activation='softmax')(x)

# Combine the base model with custom classification head
model = Model(inputs=base_model.input, outputs=predictions)

# Adjust learning rate
optimizer = Adam(learning_rate=0.0001)

# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [28]:
# Train the model
epochs = 2
history = model.fit(train_generator, epochs=epochs, validation_data=val_generator)


Epoch 1/2
Epoch 2/2


In [29]:
# Evaluate the model on the validation set
loss, accuracy = model.evaluate(val_generator)
print("Validation Loss:", loss)
print("Validation Accuracy:", accuracy)


Validation Loss: 4.723873138427734
Validation Accuracy: 0.024449877440929413


In [30]:
# Save the model
model.save('dog_breed_model_vgg16.h5')
