Notebook by Zara 

#### ``Data preparation``

- Resizing images to a consistent size
- Normalizing pixel values
- Splitting the dataset into training and testing sets

In [1]:
# import libraries 
import pandas as pd
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# from tensorflow.keras.losses import binary_crossentropy


# Set to CPU for now
# tf.config.set_visible_devices([], 'GPU')

In [2]:
data = pd.read_csv('/Users/zaravanthoff/Desktop/MasterProject/Datasets/PublicDataset/flickr_logos_27_dataset/flickr_logos_27_dataset_training_set_annotation.txt', sep='\s+',header=None)
folder_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/PublicDataset/flickr_logos_27_dataset/flickr_logos_27_dataset_images"

In [3]:
# Rename the columns
data.columns = ['Image', 'Class', 'Label', 'Xmin', 'Ymin', 'Xmax', 'Ymax']
data

Unnamed: 0,Image,Class,Label,Xmin,Ymin,Xmax,Ymax
0,144503924.jpg,Adidas,1,38,12,234,142
1,2451569770.jpg,Adidas,1,242,208,413,331
2,390321909.jpg,Adidas,1,13,5,89,60
3,4761260517.jpg,Adidas,1,43,122,358,354
4,4763210295.jpg,Adidas,1,83,63,130,93
...,...,...,...,...,...,...,...
4531,2126991906.jpg,Yahoo,6,15,6,253,54
4532,217288720.jpg,Yahoo,6,136,161,304,222
4533,2472817996.jpg,Yahoo,6,2,4,499,106
4534,2514220918.jpg,Yahoo,6,1,69,342,157


In [4]:
print(data.dtypes)

Image    object
Class    object
Label     int64
Xmin      int64
Ymin      int64
Xmax      int64
Ymax      int64
dtype: object


``Resizing images to a consistent size``

In [5]:
import cv2
import os

# Define the target width and height for resizing
target_width = 416
target_height = 416

resized_images = []
labels = []

# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row[0]  # Accessing by integer index 0
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    try:
        # Read the image using OpenCV
        image = cv2.imread(image_path)
        
        # Check if the image was read successfully
        if image is None:
            raise FileNotFoundError(f"Unable to read image at path: {image_path}")
        
        # Resize the image to the target size
        resized_image = cv2.resize(image, (target_width, target_height))
        
        # Append resized image to the list
        resized_images.append(resized_image)
        
        # Append label to the list (assuming label is in the second column of the dataframe)
        label = row[2]
        labels.append(label)
        
        print(f"Image processed successfully: {image_filename}")
    except Exception as e:
        print(f"Error processing image at path {image_path}: {str(e)}")

  image_filename = row[0]  # Accessing by integer index 0
  label = row[2]


Image processed successfully: 144503924.jpg
Image processed successfully: 2451569770.jpg
Image processed successfully: 390321909.jpg
Image processed successfully: 4761260517.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 1230939811.jpg
Image processed successfully: 1230939811.jpg
Image processed successfully: 2550056374.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image proces

In [6]:
# Convert lists to numpy arrays
resized_images = np.array(resized_images)
labels = np.array(labels)

# Check the shape of resized images and labels
print("Resized Images Shape:", resized_images.shape)
print("Labels Shape:", labels.shape)

Resized Images Shape: (4536, 416, 416, 3)
Labels Shape: (4536,)


``Normalization of pixel values``

In [7]:
# # Normalize pixel values
# resized_images = resized_images.astype('float32') / 255.0

# # Check the range of pixel values after normalization
# print("Minimum Pixel Value After Normalization:", np.min(resized_images))
# print("Maximum Pixel Value After Normalization:", np.max(resized_images))

``Split data into train, test, validation``

In [8]:
from sklearn.model_selection import train_test_split

# Define the percentage of data for training, validation, and testing
train_percent = 0.8  # 80% of data for training
val_percent = 0.1    # 10% of data for validation
test_percent = 0.1   # 10% of data for testing

# Split the data into training and the rest
train_data, temp_data = train_test_split(data, train_size=train_percent, random_state=42)

# Split the rest into validation and testing
val_data, test_data = train_test_split(temp_data, train_size=val_percent/(val_percent + test_percent), random_state=42)

# Display the number of samples in each set
print("Number of samples in training set:", len(train_data))
print("Number of samples in validation set:", len(val_data))
print("Number of samples in test set:", len(test_data))

# Display the number of unique images in each set
print("Number of unique images in training set:", len(train_data['Image'].unique()))
print("Number of unique images in validation set:", len(val_data['Image'].unique()))
print("Number of unique images in test set:", len(test_data['Image'].unique()))

Number of samples in training set: 3628
Number of samples in validation set: 454
Number of samples in test set: 454
Number of unique images in training set: 776
Number of unique images in validation set: 284
Number of unique images in test set: 284


``YOLO architecture``

In [9]:
def yolo_model(input_shape, num_classes):
    # Input layer
    inputs = tf.keras.Input(shape=input_shape)

    # YOLO backbone
    backbone = tf.keras.applications.MobileNetV2(input_tensor=inputs, include_top=False, weights='imagenet')
    
    # Freeze the backbone layers
    backbone.trainable = False

    # YOLO head
    x = layers.GlobalAveragePooling2D()(backbone.output)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    
    # Output layer for bounding box coordinates
    bbox_output = layers.Dense(4, activation='linear', name='bbox_output')(x)  # Linear activation for regression
    
    # Output layer for class prediction
    class_output = layers.Dense(num_classes, activation='softmax', name='class_output')(x)
    
    # Create the model
    model = tf.keras.Model(inputs=inputs, outputs=[bbox_output, class_output])

    return model

def bounding_box_loss(y_true, y_pred):
    # Compute the Mean Squared Error (MSE) loss for bounding box regression
    mse_loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
    loss = tf.reduce_mean(mse_loss)
    return loss

# Define input shape and number of classes
input_shape = (416, 416, 3)  # Example input shape (adjust as needed)
num_classes = 27  # Example number of classes (adjust as needed)

# Create the YOLO model
model = yolo_model(input_shape, num_classes)

# Compile the model with custom loss function
model.compile(optimizer='adam', loss={'bbox_output': bounding_box_loss, 'class_output': 'categorical_crossentropy'}, metrics={'class_output': 'accuracy'})

# Display the model summary
# model.summary()


  backbone = tf.keras.applications.MobileNetV2(input_tensor=inputs, include_top=False, weights='imagenet')
2024-04-09 16:52:13.754806: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-09 16:52:13.754850: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-04-09 16:52:13.754864: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-04-09 16:52:13.755078: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-09 16:52:13.755667: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


``Define data generators``

In [20]:
from tensorflow.keras.preprocessing import image
import numpy as np

batch_size = 32  # Example batch size
target_size = (416, 416)  # Example target size for YOLO model


# Define a custom data generator
def custom_data_generator(data, batch_size, target_size):
    num_samples = len(data)
    while True:
        # Shuffle the data
        data = data.sample(frac=1).reset_index(drop=True)
        
        # Generate batches
        for offset in range(0, num_samples, batch_size):
            batch_data = data[offset:offset+batch_size]
            batch_images = np.zeros((batch_size, *target_size, 3))
            batch_bboxes = np.zeros((batch_size, 4))
        for i, (_, row) in enumerate(batch_data.iterrows()):
            # Load and preprocess image
            image_path = os.path.join(folder_path, row['Image'])
            image = cv2.imread(image_path)
            image = cv2.resize(image, target_size)
            image = image.astype('float32') / 255.0
            
            # Extract bounding box coordinates and normalize
            xmin = row['Xmin'] / target_size[1]
            ymin = row['Ymin'] / target_size[0]
            xmax = row['Xmax'] / target_size[1]
            ymax = row['Ymax'] / target_size[0]
            bbox = [xmin, ymin, xmax, ymax]
            
            # Append image and bounding box to batch
            batch_images[i] = image
            batch_bboxes[i] = bbox

        yield batch_images, batch_bboxes


# # Create instances of custom data generator
# train_generator = custom_data_generator(train_data, batch_size, target_size)
# val_generator = custom_data_generator(val_data, batch_size, target_size)
# test_generator = custom_data_generator(test_data, batch_size, target_size)


In [21]:
# Define batch size
batch_size = 32

# Create TensorFlow Datasets from generator functions
train_dataset = tf.data.Dataset.from_generator(
    lambda: custom_data_generator(train_data, batch_size, target_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 416, 416, 3], [None, 4])
).batch(batch_size)

val_dataset = tf.data.Dataset.from_generator(
    lambda: custom_data_generator(val_data, batch_size, target_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 416, 416, 3], [None, 4])
).batch(batch_size)

test_dataset = tf.data.Dataset.from_generator(
    lambda: custom_data_generator(test_data, batch_size, target_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 416, 416, 3], [None, 4])
).batch(batch_size)


``Training``

In [22]:
# Train the model
history = model.fit(train_dataset,
                    epochs=10,
                    validation_data=val_dataset)

Epoch 1/10


ValueError: Input 0 of layer "functional_1" is incompatible with the layer: expected shape=(None, 416, 416, 3), found shape=(None, None, 416, 416, 3)

In [None]:
# Evaluate the model on the testing set
test_loss = model.evaluate(test_generator)
print("Test Loss:", test_loss)

``Prediction``

In [12]:
import cv2
import numpy as np

# Load the image
image_path = '/Users/zaravanthoff/Desktop/MasterProject/Datasets/PrintAds/17329368.png'
image = cv2.imread(image_path)

# Preprocess the image (resize and normalize)
resized_image = cv2.resize(image, (416, 416))
resized_image = resized_image.astype('float32') / 255.0

# Make predictions
predictions = model.predict(np.expand_dims(resized_image, axis=0))

# Post-process predictions to extract bounding box coordinates
predicted_bbox = predictions[0][0]  # Assuming only one prediction for simplicity
xmin, ymin, xmax, ymax = predicted_bbox

# Convert coordinates from normalized values to actual image coordinates
height, width, _ = image.shape
xmin = int(xmin * width)
xmax = int(xmax * width)
ymin = int(ymin * height)
ymax = int(ymax * height)

2024-04-09 15:55:01.965330: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


**Print coordinates xmin, ymin, xmax, ymax**

In [13]:
# Print predicted bounding box coordinates
print("Predicted Bounding Box Coordinates:")
print("xmin:", xmin)
print("ymin:", ymin)
print("xmax:", xmax)
print("ymax:", ymax)

Predicted Bounding Box Coordinates:
xmin: 193
ymin: 279
xmax: 200
ymax: 297


**Print actual bounding box**

In [14]:
# # Draw the bounding box on the original image
# color = (0, 255, 0)  # Green color for the bounding box
# thickness = 2  # Thickness of the bounding box
# cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

# # Display the image with the bounding box
# cv2.imshow('Image with Bounding Box', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()  # Close the OpenCV window