In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import os
import shutil
from sklearn.model_selection import train_test_split

2024-11-20 23:11:16.132135: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732165876.144436    2601 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732165876.147916    2601 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-20 23:11:16.160182: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Set Image directories
dataset_dir = '../dataset/images'
train_dir = '../dataset/train'
validation_dir = '../dataset/val'
test_dir = '../dataset/test'

In [3]:
# Function to delete and recreate directories
def reset_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory, exist_ok=True)

# Reset train, validation, and test directories
reset_directory(train_dir)
reset_directory(validation_dir)
reset_directory(test_dir)

# Get all subdirectories (each representing a dog breed)
breed_dirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

print(f'Number of breeds: {len(breed_dirs)}')

# Iterate through each breed directory
for breed in breed_dirs:
    breed_path = os.path.join(dataset_dir, breed)
    images = [os.path.join(breed_path, img) for img in os.listdir(breed_path) if img.endswith(('jpg', 'jpeg', 'png'))]

    # Split the images into train, validation, and test sets
    train_images, temp_images = train_test_split(images, test_size=0.3, random_state=42)
    val_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

    # Create breed directories in train, validation, and test directories
    os.makedirs(os.path.join(train_dir, breed), exist_ok=True)
    os.makedirs(os.path.join(validation_dir, breed), exist_ok=True)
    os.makedirs(os.path.join(test_dir, breed), exist_ok=True)

    # Function to copy images to respective directories
    def copy_images(image_list, target_dir):
        for image_path in image_list:
            shutil.copy(image_path, os.path.join(target_dir, breed))

    # Copy images to train, validation, and test directories
    copy_images(train_images, train_dir)
    copy_images(val_images, validation_dir)
    copy_images(test_images, test_dir)

Number of breeds: 120


In [4]:
# Data generator functions
def create_datagen(preprocessing_function =""):
    return ImageDataGenerator(
        preprocessing_function=preprocessing_function,
        rescale=1./255  # Normalize pixel values
        #rotation_range=20,
        #width_shift_range=0.2,
        #height_shift_range=0.2,
        #zoom_range=0.2,
        #horizontal_flip=True,
    )
def create_generator(datagen, directory, shuffle):
    return datagen.flow_from_directory(
        directory,
        target_size=(300,300),
        batch_size=16,
        class_mode='categorical',
        shuffle=shuffle,
    )

In [5]:
# Create data generators
datagen_nasnet = create_datagen(tf.keras.applications.vgg16.preprocess_input)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator_nasnet = create_generator(datagen_nasnet, train_dir, True)
val_generator = create_generator(val_datagen, validation_dir, True)
test_generator = create_generator(test_datagen, test_dir, False)

Found 14357 images belonging to 120 classes.
Found 3082 images belonging to 120 classes.
Found 3141 images belonging to 120 classes.


In [6]:
# Model Building
nasnet_model = tf.keras.applications.NASNetLarge(weights='imagenet', include_top=False, input_shape=(300,300,3))
x = nasnet_model.output
# Freeze base model layers
nasnet_model.trainable = False
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x_preds = tf.keras.layers.Dense(120, activation='softmax')(x)
nasnet_model = tf.keras.Model(inputs=nasnet_model.input, outputs=x_preds)

I0000 00:00:1732166442.417085    2601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6073 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5


In [7]:
nasnet_model.summary()

In [8]:
nasnet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
# Model Training
history = nasnet_model.fit(train_generator_nasnet, validation_data=val_generator, epochs=5)

  self._warn_if_super_not_called()


Epoch 1/5


I0000 00:00:1732166484.192121    4403 service.cc:148] XLA service 0x7f3e18040d70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732166484.192148    4403 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 2070 SUPER, Compute Capability 7.5
2024-11-20 23:21:24.691712: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1732166488.026730    4403 cuda_dnn.cc:529] Loaded cuDNN version 90300
2024-11-20 23:21:30.682468: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 5.85GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-11-20 23:21:30.878544: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) r

[1m 29/898[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m30:35[0m 2s/step - accuracy: 0.2516 - loss: 4.1015

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test data
test_loss, test_acc = nasnet_model.evaluate(test_generator, steps=test_generator.classes.size)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_acc}")

# Reset the test generator
test_generator.reset()

# Get the predictions
predictions = model.predict(test_generator, steps=test_generator.classes.size)

# Get true labels
y_true = test_generator.classes

# Convert predicted probabilities to class labels
y_pred = predictions.argmax(axis=1)

# Print classification report
print("Classification Report:")
print(classification_report(y_true, y_pred, zero_division=1))

[1m 196/3141[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m7:03[0m 144ms/step - accuracy: 0.8632 - loss: 0.7117

2024-11-20 22:59:21.193782: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-11-20 22:59:21.195150: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
  self.gen.throw(value)


[1m3141/3141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 11ms/step - accuracy: 0.8539 - loss: 0.7679  
Test Loss: 0.7715925574302673
Test Accuracy: 0.8532314300537109
[1m3141/3141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 13ms/step   
Classification Report:


ValueError: Found input variables with inconsistent numbers of samples: [3141, 376920]

In [13]:
# Plot training and validation accuracy
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history.history['val_accuracy'], label='Val Accuracy', marker='o')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.grid(True)
plt.show()

# Plot training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss', marker='o')
plt.plot(history.history['val_loss'], label='Val Loss', marker='o')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.grid(True)
plt.show()

NameError: name 'history' is not defined

<Figure size 1200x600 with 0 Axes>