Notebook by Zara

**``CNN using flickr_logos_27_dataset``**

In [6]:
# Import libraries

import pandas as pd
import cv2
import numpy as np
import os
import tensorflow as tf


from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam

import tensorflow as tf
from tensorflow.keras import layers, models



In [7]:
# Option 1
data = pd.read_csv('flickr_logos_27_dataset_training_set_annotation.txt', sep='\s+',header=None)

# # Option 2
# data = pd.read_csv('/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_dataset.csv')
# data.drop(columns=['width', 'height'], inplace=True)
# data

In [8]:
# Print out column names
print(data.columns)

Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')


In [9]:
data

Unnamed: 0,0,1,2,3,4,5,6
0,144503924.jpg,Adidas,1,38,12,234,142
1,2451569770.jpg,Adidas,1,242,208,413,331
2,390321909.jpg,Adidas,1,13,5,89,60
3,4761260517.jpg,Adidas,1,43,122,358,354
4,4763210295.jpg,Adidas,1,83,63,130,93
...,...,...,...,...,...,...,...
4531,2126991906.jpg,Yahoo,6,15,6,253,54
4532,217288720.jpg,Yahoo,6,136,161,304,222
4533,2472817996.jpg,Yahoo,6,2,4,499,106
4534,2514220918.jpg,Yahoo,6,1,69,342,157


Explanation dataset:

- Column 3: x-coordinate of the top-left corner. (xmin)
- Column 4: y-coordinate of the top-left corner. (ymin)
- Column 5: x-coordinate of the bottom-right corner. (xmax)
- Column 6: y-coordinate of the bottom-right corner. (ymax)

For example, in the first row:

- Image: "144503924.jpg"
- Brand: "Adidas"
- Class label: 1
- Bounding box coordinates: (38, 12, 234, 142)

In [10]:
# Option 1
# Folder path containing the images
folder_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/"

# # Option 2
# # Folder path containing the images
# folder_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_images(2)"

In [11]:
# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row[0]  # Accessing by integer index 0
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # # Display the image (you can perform any processing here)
    # cv2.imshow("Image", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

[ WARN:0@166.714] global loadsave.cpp:248 findDecoder imread_('/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/144503924.jpg'): can't open/read file: check file path/integrity
[ WARN:0@166.716] global loadsave.cpp:248 findDecoder imread_('/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/2451569770.jpg'): can't open/read file: check file path/integrity
[ WARN:0@166.716] global loadsave.cpp:248 findDecoder imread_('/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/390321909.jpg'): can't open/read file: check file path/integrity
[ WARN:0@166.716] global loadsave.cpp:248 findDecoder imread_('/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/4761260517.jpg'): can't open/read file: check file path/integrity
[ WARN:0@166.716] global loadsave.cpp:248 findDecoder imread_(

#### **Augmentation (optional)**

Augmentation refers to the technqiue of artificially increasing the size of a dataset by applying various transformations to the existing data samples. These transformations introduce variations in the data while preserving its original meaning, thereby making the model more robust and improving its generalization ability. These operations (roation, flipping, scaling, shifting, zooming, changing brightness/contrast) mimic real-world variations that can occur in the data and help the model learn to generalize better to unseen examples.

#### **Data preprocessing (prepare the dataset)**

- Resizing images to a consistent size
- Normalizing pixel values
- Splitting the dataset into training and testing sets

``Resizing images to a consistent size``

In [12]:
# Define the target size for resizing
target_height = 100
target_width = 100

In [13]:
# List to store resized images and corresponding labels
resized_images = []
labels = []

In [14]:
# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row[0]  # Accessing by integer index 0
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Resize the image to the target size
    resized_image = cv2.resize(image, (target_width, target_height))
    
    # Append resized image to the list
    resized_images.append(resized_image)
    
    # Append label to the list (assuming label is in the second column of the dataframe)
    label = row[1]
    labels.append(label)

[ WARN:0@166.907] global loadsave.cpp:248 findDecoder imread_('/Users/zaravanthoff/Desktop/MasterProject/Datasets/flickr_logos_27_dataset/flickr_logos_27_dataset_images/144503924.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.9.0) /Users/xperience/GHA-OpenCV-Python2/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [None]:
# Convert lists to numpy arrays
resized_images = np.array(resized_images)
labels = np.array(labels)

In [None]:
# Check the shape of resized images and labels
print("Resized Images Shape:", resized_images.shape)
print("Labels Shape:", labels.shape)

Resized Images Shape: (4536, 100, 100, 3)
Labels Shape: (4536,)


``Normalizization of pixel values``

Scaling the pixel values to be within a certain range, such as [0. 1]. This can help the neural network converge faster during training. Dividing by 255.0 normalizes the pixel values to be within the range [0, 1]. 
- Grayscale images: For grayscale images, each pixel value represents the intensity of that pixel, and it typically ranges from 0 (black) and 255 (white). Dividing by 255 scales these values to the range [0, 1], where 0 represents black and 1 represents white.
- Color images: For color images, each pixel has three channels (red, green, and blue). Each channel has pixel values ranging from 0 to 255. By dividing by 255, we normalize each channel independelty, ensuring that each channel's values fall within the [0, 1] range. 

In [None]:
# Normalize pixel values
resized_images = resized_images.astype('float32') / 255.0

# Check the range of pixel values after normalization
print("Minimum Pixel Value After Normalization:", np.min(resized_images))
print("Maximum Pixel Value After Normalization:", np.max(resized_images))

Minimum Pixel Value After Normalization: 0.0
Maximum Pixel Value After Normalization: 1.0


-----------------------------------

#### WORKING MODEL 

#### **Model Architecture**

This involves stacking convolutional layers, pooling layers, and fully connected layers.

In [None]:
# Randomly sample a subset of the dataset
num_samples_to_keep = 4000  # Adjust this number based on your requirements
num_instances = len(resized_images)
sampled_indices = np.random.choice(num_instances, num_samples_to_keep, replace=False)
sampled_images = resized_images[sampled_indices]
sampled_labels = labels[sampled_indices]

random_state = 42 ensures reproducibility, meaning that the same split will be generated each time you run the code

In [None]:
# Split the sampled subset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sampled_images, sampled_labels, test_size=0.2, random_state=42)

In [None]:
# Print the shapes of the training and testing sets
print("Training set shape (X_train):", X_train.shape)
print("Training set shape (y_train):", y_train.shape)
print("Testing set shape (X_test):", X_test.shape)
print("Testing set shape (y_test):", y_test.shape)

Training set shape (X_train): (3200, 100, 100, 3)
Training set shape (y_train): (3200,)
Testing set shape (X_test): (800, 100, 100, 3)
Testing set shape (y_test): (800,)


In [None]:
# Number of logo classes in your dataset
num_classes = 10  # Replace 10 with the actual number of logo classes

In [None]:
model = tf.keras.Sequential([
    # Convolutional layers
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(target_height, target_width, 3)),
    tf.keras.layers.MaxPooling2D((6, 6), strides=2, padding='same'),
    tf.keras.layers.BatchNormalization(),
        
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((6, 6), strides=2, padding='same'),
    tf.keras.layers.BatchNormalization(),
        
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((6, 6), strides=2, padding='same'),
    tf.keras.layers.BatchNormalization(),
        
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((6, 6), strides=2, padding='same'),
    tf.keras.layers.BatchNormalization(),
        
    # Flatten layer
    tf.keras.layers.Flatten(),
        
    # Fully connected layers
    tf.keras.layers.Dense(33, activation='relu'),
    tf.keras.layers.Dropout(0.5),
        
    tf.keras.layers.Dense(33, activation='relu'),
    tf.keras.layers.Dropout(0.5),
        
    # Output layer
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

  super().__init__(
2024-03-21 16:19:31.555271: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-03-21 16:19:31.555333: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-03-21 16:19:31.555353: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-03-21 16:19:31.555601: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-03-21 16:19:31.555636: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
# # Define the CNN model
# model = models.Sequential([
#     # Convolutional layers
#     layers.Conv2D(32, (3, 3), activation='relu', input_shape=(target_height, target_width, 3)),
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(64, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(128, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),
    
#     # Flatten layer to convert 3D feature maps to 1D feature vectors
#     layers.Flatten(),
    
#     # Fully connected layers
#     layers.Dense(512, activation='relu'),
#     layers.Dropout(0.5),  # Dropout layer to reduce overfitting
#     layers.Dense(num_classes, activation='softmax')  # Output layer with softmax activation for multi-class classification
# ])

In [None]:
# Print model summary
model.summary()

#### **Training**

This involves feeding the training images through the network, computing the loss, and adjusting the network's weights using optimization algorithms like Stochastic Gradient Descnet (SGD) or Adam.

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform labels in training set
y_train_encoded = label_encoder.fit_transform(y_train)

# Transform labels in testing set (using the same encoder)
y_test_encoded = label_encoder.transform(y_test)

In [None]:
# Train the model
history = model.fit(X_train, y_train_encoded, epochs=10, validation_data=(X_test, y_test_encoded))

Epoch 1/10


2024-03-21 16:19:57.770751: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 70ms/step - accuracy: 0.0755 - loss: 0.9060 - val_accuracy: 0.0662 - val_loss: 0.8677
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - accuracy: 0.0884 - loss: 2.0180 - val_accuracy: 0.0425 - val_loss: 18.6037
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - accuracy: 0.0495 - loss: 67.1693 - val_accuracy: 0.0500 - val_loss: 550.3566
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - accuracy: 0.0424 - loss: 638.2719 - val_accuracy: 0.0500 - val_loss: 1548.8213
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 66ms/step - accuracy: 0.0400 - loss: 1796.2828 - val_accuracy: 0.0437 - val_loss: 1326.0061
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - accuracy: 0.0490 - loss: 3199.3257 - val_accuracy: 0.0500 - val_loss: 1841.2544
Epoch 7/10


In [None]:
# Evaluate the model on the testing set
test_loss, test_acc = model.evaluate(X_test, y_test_encoded)

# Print the test accuracy
print('Test accuracy:', test_acc)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0389 - loss: 3397.5798
Test accuracy: 0.03999999910593033


Versus the standard CNN architecture I used before, the Patch-CNN performs way better. The accuracy of the standard CNN was 0.028 and the and the Patch-CNN is now 0.04!

-------------------------

#### **Final model**

In [None]:
from tensorflow.keras.preprocessing import image

def predict_logo(image_path, model, label_encoder):
    # Load and preprocess the image
    img = image.load_img(image_path, target_size=(target_height, target_width))
    img_array = image.img_to_array(img)
    img_array = img_array.reshape((1, target_height, target_width, 3))
    img_array = img_array / 255.0  # Normalize pixel values
    
    # Make prediction
    prediction = model.predict(img_array)
    
    # Decode the prediction
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    
    return predicted_label[0]

# Example usage:
image_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/full_dataset/full_images(2)/35_jpg.rf.73689f20496c4b1f1245e24e88b18a3e.jpg"  # Replace with the path to your image
predicted_logo = predict_logo(image_path, model, label_encoder)
print("Predicted logo:", predicted_logo)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Predicted logo: Fedex
