Notebook by Zara

**``CNN using merged dataset``**

In [1]:
import pandas as pd
import cv2
import numpy as np
import os

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam

import tensorflow as tf
from tensorflow.keras import layers, models


In [2]:
data = pd.read_csv('/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_dataset.csv')

LITTLE CLEANING

In [3]:
# data.drop(columns=['width', 'height'], inplace=True)
data

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,class_index
0,3151369135636477165_jpg.rf.521cb85f2777fe14c53...,480.0,600.0,NIKE,352,507,370,538,1
1,youtube-44_jpg.rf.6e4b9a30c5c74280ee120b18193d...,1920.0,1080.0,NIKE,877,441,963,469,1
2,youtube-44_jpg.rf.6e4b9a30c5c74280ee120b18193d...,1920.0,1080.0,NIKE,316,661,337,680,1
3,2746419871990892444_jpg.rf.76034c0a0150b72a5a2...,480.0,853.0,NIKE,223,510,232,515,1
4,2746419871990892444_jpg.rf.76034c0a0150b72a5a2...,480.0,853.0,NIKE,160,649,166,655,1
...,...,...,...,...,...,...,...,...,...
18254,2126991906.jpg,,,Yahoo,15,6,253,54,76
18255,217288720.jpg,,,Yahoo,136,161,304,222,76
18256,2472817996.jpg,,,Yahoo,2,4,499,106,76
18257,2514220918.jpg,,,Yahoo,1,69,342,157,76


In [14]:
# Folder path containing the images
folder_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_images(2)"

In [15]:
# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row.iloc[0]  # Accessing by position using iloc
                            # THis ensures that values are accessed by position rather than by integer index labels.
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # # Display the image (you can perform any processing here)
    # cv2.imshow("Image", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

In [17]:
num_classes = data['class'].nunique()
print("Number of logo classes:", num_classes)

Number of logo classes: 76


### Data preprocessing

RESIZE

In [18]:
# Define the target size for resizing
target_height = 100
target_width = 100

In [19]:
# List to store resized images and corresponding labels
resized_images = []
labels = []

In [21]:
# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row.iloc[0]  # Accessing by integer index 0
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Resize the image to the target size
    resized_image = cv2.resize(image, (target_width, target_height))
    
    # Append resized image to the list
    resized_images.append(resized_image)
    
    # Append label to the list (assuming label is in the second column of the dataframe)
    label = row[1]
    labels.append(label)

  label = row[1]


In [22]:
# Convert lists to numpy arrays
resized_images = np.array(resized_images)
labels = np.array(labels)

In [23]:
# Check the shape of resized images and labels
print("Resized Images Shape:", resized_images.shape)
print("Labels Shape:", labels.shape)

Resized Images Shape: (36518, 100, 100, 3)
Labels Shape: (36518,)


NORMALIZATION OF PIXEL VALUES

In [24]:
# Normalize pixel values
resized_images = resized_images.astype('float32') / 255.0

# Check the range of pixel values after normalization
print("Minimum Pixel Value After Normalization:", np.min(resized_images))
print("Maximum Pixel Value After Normalization:", np.max(resized_images))

Minimum Pixel Value After Normalization: 0.0
Maximum Pixel Value After Normalization: 1.0


MODEL ARCHITECTURE

In [25]:
# Randomly sample a subset of the dataset
num_samples_to_keep = 10000  # Adjust this number based on your requirements
num_instances = len(resized_images)
sampled_indices = np.random.choice(num_instances, num_samples_to_keep, replace=False)
sampled_images = resized_images[sampled_indices]
sampled_labels = labels[sampled_indices]

In [26]:
# Split the sampled subset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sampled_images, sampled_labels, test_size=0.2, random_state=42)


In [27]:
# Print the shapes of the training and testing sets
print("Training set shape (X_train):", X_train.shape)
print("Training set shape (y_train):", y_train.shape)
print("Testing set shape (X_test):", X_test.shape)
print("Testing set shape (y_test):", y_test.shape)

Training set shape (X_train): (8000, 100, 100, 3)
Training set shape (y_train): (8000,)
Testing set shape (X_test): (2000, 100, 100, 3)
Testing set shape (y_test): (2000,)


In [28]:
# Number of logo classes in your dataset
num_classes = len(data['class'].unique())

# Ensure num_classes is an integer
if not isinstance(num_classes, int):
    num_classes = int(num_classes)

num_classes

76

In [29]:
# Define the CNN model
model = models.Sequential([
    # Convolutional layers
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(target_height, target_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Flatten layer to convert 3D feature maps to 1D feature vectors
    layers.Flatten(),
    
    # Fully connected layers
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),  # Dropout layer to reduce overfitting
    layers.Dense(num_classes, activation='softmax')  # Output layer with softmax activation for multi-class classification
])

  super().__init__(
2024-03-21 16:41:18.629505: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-03-21 16:41:18.629638: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-03-21 16:41:18.629649: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-03-21 16:41:18.630262: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-03-21 16:41:18.630840: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [30]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [31]:
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform labels in training set
y_train_encoded = label_encoder.fit_transform(y_train)

# Transform labels in testing set (using the same encoder)
# Ensure consistency of labels in testing set
y_test_unique = set(y_test)
unseen_labels = y_test_unique - set(label_encoder.classes_)
if unseen_labels:
    raise ValueError(f"Unseen labels in testing set: {unseen_labels}")
y_test_encoded = label_encoder.transform(y_test)

In [32]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform labels in training set
y_train_encoded = label_encoder.fit_transform(y_train)

# Transform labels in testing set (using the same encoder)
y_test_encoded = label_encoder.transform(y_test)

In [33]:
# Train the model
history = model.fit(X_train, y_train_encoded, epochs=10, validation_data=(X_test, y_test_encoded))


Epoch 1/10


2024-03-21 16:42:13.729102: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 61ms/step - accuracy: 0.7051 - loss: 1.8001 - val_accuracy: 0.7830 - val_loss: 0.9313
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 56ms/step - accuracy: 0.8091 - loss: 0.8387 - val_accuracy: 0.8550 - val_loss: 0.5737
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 58ms/step - accuracy: 0.8870 - loss: 0.4670 - val_accuracy: 0.9135 - val_loss: 0.3912
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 54ms/step - accuracy: 0.9388 - loss: 0.2603 - val_accuracy: 0.9315 - val_loss: 0.3448
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 54ms/step - accuracy: 0.9487 - loss: 0.1833 - val_accuracy: 0.9345 - val_loss: 0.3645
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 54ms/step - accuracy: 0.9605 - loss: 0.1438 - val_accuracy: 0.9455 - val_loss: 0.2936
Epoch 7/10
[1m250/250[0m 

In [34]:
# Evaluate the model on the testing set
test_loss, test_acc = model.evaluate(X_test, y_test_encoded)

# Print the test accuracy
print('Test accuracy:', test_acc)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9473 - loss: 0.3891
Test accuracy: 0.9505000114440918


-------------

FINAL MODEL

In [41]:
from tensorflow.keras.preprocessing import image

def predict_logo(image_path, model, label_encoder):
    # Load and preprocess the image
    img = image.load_img(image_path, target_size=(target_height, target_width))
    img_array = image.img_to_array(img)
    img_array = img_array.reshape((1, target_height, target_width, 3))
    img_array = img_array / 255.0  # Normalize pixel values
    
    # Make prediction
    prediction = model.predict(img_array)
    
    # Decode the prediction
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    
    return predicted_label[0]

# Example usage:
image_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_images(2)/35_jpg.rf.73689f20496c4b1f1245e24e88b18a3e.jpg"  # Replace with the path to your image
predicted_logo = predict_logo(image_path, model, label_encoder)
print("Predicted logo:", predicted_logo)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Predicted logo: Google


In [37]:
target_size = (100, 100)

def predict_logo(image_path, model, label_encoder, target_size):
    # Load and preprocess the image
    img = image.load_img(image_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = img_array.reshape((1, *target_size, 3))
    img_array = img_array / 255.0  # Normalize pixel values
    
    # Make prediction
    prediction = model.predict(img_array)
    
    # Decode the prediction
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    
    return predicted_label[0]

# Example usage:
image_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_images(2)/22_png.rf.722260fc209cbf978ada11b31cf7b58c.jpg"  # Replace with the path to your image
predicted_logo = predict_logo(image_path, model, label_encoder, target_size)
print("Predicted logo:", predicted_logo)


AttributeError: 'numpy.ndarray' object has no attribute 'load_img'