In [1]:
# import tensorflow as tf
# from tensorflow.compat.v1 import ConfigProto
# from tensorflow.compat.v1 import InteractiveSession
# config = ConfigProto()
# config.gpu_options.allow_growth = True
# session = InteractiveSession(config=config)

In [2]:
import pandas as pd
import numpy as np
import os 
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#TEST_IMAGE_DIR = "./dog-breed-identification/test/"
TRAIN_IMAGE_DIR = "./dog-breed-identification/train/"
LABELS = './dog-breed-identification/labels.csv'

data = pd.read_csv(LABELS)
class_names = data['breed'].unique()

print(data)



                                     id                     breed
0      000bec180eb18c7604dcecc8fe0dba07               boston_bull
1      001513dfcb2ffafc82cccf4d8bbaba97                     dingo
2      001cdf01b096e06d78e9e5112d419397                  pekinese
3      00214f311d5d2247d5dfe4fe24b2303d                  bluetick
4      0021f9ceb3235effd7fcde7f7538ed62          golden_retriever
...                                 ...                       ...
10217  ffd25009d635cfd16e793503ac5edef0                    borzoi
10218  ffd3f636f7f379c51ba3648a9ff8254f            dandie_dinmont
10219  ffe2ca6c940cddfee68fa3cc6c63213f                  airedale
10220  ffe5f6d8e2bff356e9482a80a6e29aac        miniature_pinscher
10221  fff43b07992508bc822f33d8ffd902ae  chesapeake_bay_retriever

[10222 rows x 2 columns]


In [3]:
#Data from the csv only contans the ids of the photos and not the photos itself
#Iterate through the data and check whether the photo is in test or train
#Then fetch it and store it in its proper variable
import tensorflow as tf
from os.path import join
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

def load_and_preprocess_images(image_dir, image_ids, target_size=(224, 224)):
    image_data = []
    for img_id in image_ids:
        img_path = join(image_dir, img_id + ".jpg")
        img = load_img(img_path, target_size=target_size)
        img_array = img_to_array(img)
        preprocessed_img = preprocess_input(img_array) #I dont flatten the image here
        image_data.append(preprocessed_img)
    return np.array(image_data)

# Get image IDs for train and test
train_image_ids = data['id'].values
labels = data['breed'].values
# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

train_image_ids, unfinished_test_image_ids, train_labels, unfinished_test_labels = train_test_split(train_image_ids, encoded_labels, test_size=0.3, random_state=42, stratify=encoded_labels)

test_image_ids, val_image_ids, test_labels, val_labels = train_test_split(unfinished_test_image_ids, unfinished_test_labels, test_size=0.5, random_state=42, stratify=unfinished_test_labels)

train_images = load_and_preprocess_images(TRAIN_IMAGE_DIR, train_image_ids)
test_images = load_and_preprocess_images(TRAIN_IMAGE_DIR, test_image_ids)
val_images = load_and_preprocess_images(TRAIN_IMAGE_DIR, val_image_ids)

In [4]:
train_images = train_images / 250.0
test_images = test_images / 250.0
val_images = val_images / 250.0

In [5]:


datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2
)

datagen.fit(train_images)

In [6]:
from tensorflow.keras.applications import VGG16

# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Set the layers in the base model to non-trainable
for layer in base_model.layers:
    layer.trainable = False

# Build the custom classification head
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
predictions = tf.keras.layers.Dense(120, activation='softmax')(x)

In [7]:
model = tf.keras.Model(inputs=base_model.input, outputs=predictions)


In [8]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
import os
import tensorflow as tf
from keras.callbacks import ModelCheckpoint
#from keras.callbacks import Callback

filepath = './models/best_model.epoch{epoch:02d}-loss{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='val_loss',
                             verbose=1, 
                             save_best_only=True,
                             mode='min')


callbacks = [checkpoint]

In [10]:
batch_size = 80  # Choose a batch size that fits your memory constraints
epochs = 500

history = model.fit(datagen.flow(train_images, train_labels, batch_size=batch_size),
                    steps_per_epoch=len(train_images) // batch_size,
                    epochs=epochs,
                    validation_data=(val_images, val_labels),
                    callbacks=[callbacks])
#)

Epoch 1/500
Epoch 1: val_loss improved from inf to 4.53881, saving model to ./models\best_model.epoch01-loss4.54.hdf5
Epoch 2/500
Epoch 2: val_loss improved from 4.53881 to 4.17532, saving model to ./models\best_model.epoch02-loss4.18.hdf5
Epoch 3/500
Epoch 3: val_loss improved from 4.17532 to 3.65549, saving model to ./models\best_model.epoch03-loss3.66.hdf5
Epoch 4/500
Epoch 4: val_loss improved from 3.65549 to 3.16132, saving model to ./models\best_model.epoch04-loss3.16.hdf5
Epoch 5/500
Epoch 5: val_loss improved from 3.16132 to 2.86922, saving model to ./models\best_model.epoch05-loss2.87.hdf5
Epoch 6/500
Epoch 6: val_loss improved from 2.86922 to 2.72459, saving model to ./models\best_model.epoch06-loss2.72.hdf5
Epoch 7/500
Epoch 7: val_loss improved from 2.72459 to 2.60198, saving model to ./models\best_model.epoch07-loss2.60.hdf5
Epoch 8/500
Epoch 8: val_loss did not improve from 2.60198
Epoch 9/500
Epoch 9: val_loss did not improve from 2.60198
Epoch 10/500
Epoch 10: val_loss 

KeyboardInterrupt: 

In [None]:
#test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
#print('\nTest accuracy:', test_acc)

In [None]:
model.save('pre_trained_38.h5')