In [1]:
# import libraries 
import pandas as pd
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models  # Import models from TensorFlow
from tensorflow.keras.losses import binary_crossentropy
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('/Users/zaravanthoff/Desktop/MasterProject/Datasets/PublicDataset/flickr_logos_27_dataset/flickr_logos_27_dataset_training_set_annotation.txt', sep='\s+',header=None)
# Rename the columns
data.columns = ['Image', 'Class', 'Label', 'Xmin', 'Ymin', 'Xmax', 'Ymax']

folder_path = "/Users/zaravanthoff/Desktop/MasterProject/Datasets/PublicDataset/flickr_logos_27_dataset/flickr_logos_27_dataset_images"

In [3]:
data

Unnamed: 0,Image,Class,Label,Xmin,Ymin,Xmax,Ymax
0,144503924.jpg,Adidas,1,38,12,234,142
1,2451569770.jpg,Adidas,1,242,208,413,331
2,390321909.jpg,Adidas,1,13,5,89,60
3,4761260517.jpg,Adidas,1,43,122,358,354
4,4763210295.jpg,Adidas,1,83,63,130,93
...,...,...,...,...,...,...,...
4531,2126991906.jpg,Yahoo,6,15,6,253,54
4532,217288720.jpg,Yahoo,6,136,161,304,222
4533,2472817996.jpg,Yahoo,6,2,4,499,106
4534,2514220918.jpg,Yahoo,6,1,69,342,157


In [4]:
# Unique class labels
unique_classes = data['Label'].unique()

``Resizing images to a consistent size``

In [5]:
# Define the target width and height for resizing
target_width = 416
target_height = 416

resized_images = []
labels = []

# Iterate through each row in the dataset
for index, row in data.iterrows():
    # Extract image filename from the dataset
    image_filename = row[0]  # Accessing by integer index 0
    
    # Construct the full path to the image
    image_path = os.path.join(folder_path, image_filename)
    
    try:
        # Read the image using OpenCV
        image = cv2.imread(image_path)
        
        # Check if the image was read successfully
        if image is None:
            raise FileNotFoundError(f"Unable to read image at path: {image_path}")
        
        # Resize the image to the target size
        resized_image = cv2.resize(image, (target_width, target_height))
        
        # Append resized image to the list
        resized_images.append(resized_image)
        
        # Append label to the list (assuming label is in the second column of the dataframe)
        label = row[2]
        labels.append(label)
        
        print(f"Image processed successfully: {image_filename}")
    except Exception as e:
        print(f"Error processing image at path {image_path}: {str(e)}")

  image_filename = row[0]  # Accessing by integer index 0
  label = row[2]


Image processed successfully: 144503924.jpg
Image processed successfully: 2451569770.jpg
Image processed successfully: 390321909.jpg
Image processed successfully: 4761260517.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 4763210295.jpg
Image processed successfully: 1230939811.jpg
Image processed successfully: 1230939811.jpg
Image processed successfully: 2550056374.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image processed successfully: 3294282629.jpg
Image proces

In [6]:
# Convert lists to numpy arrays
resized_images = np.array(resized_images)
labels = np.array(labels)

# Check the shape of resized images and labels
print("Resized Images Shape:", resized_images.shape)
print("Labels Shape:", labels.shape)

Resized Images Shape: (4536, 416, 416, 3)
Labels Shape: (4536,)


``Normalization of pixel values``

In [7]:
# Normalize pixel values
resized_images = resized_images.astype('float32') / 255.0

# Check the range of pixel values after normalization
print("Minimum Pixel Value After Normalization:", np.min(resized_images))
print("Maximum Pixel Value After Normalization:", np.max(resized_images))

Minimum Pixel Value After Normalization: 0.0
Maximum Pixel Value After Normalization: 1.0


``Split data``

In [8]:
# Randomly sample a subset of the dataset
num_samples_to_keep = 1500  # Adjust this number based on your requirements
num_instances = len(resized_images)
sampled_indices = np.random.choice(num_instances, num_samples_to_keep, replace=False)
sampled_images = resized_images[sampled_indices]
sampled_labels = labels[sampled_indices]

In [9]:
# Split the sampled subset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sampled_images, sampled_labels, test_size=0.2, random_state=42)

# Further split the training set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# Display the number of samples in each set
print("Number of samples in training set:", len(X_train))
print("Number of samples in validation set:", len(X_val))
print("Number of samples in test set:", len(X_test))

Number of samples in training set: 1080
Number of samples in validation set: 120
Number of samples in test set: 300


In [10]:
# Print the shapes of the training and testing sets
print("Training set shape (X_train):", X_train.shape)
print("Training set shape (y_train):", y_train.shape)
print("Testing set shape (X_test):", X_test.shape)
print("Testing set shape (y_test):", y_test.shape)

Training set shape (X_train): (1080, 416, 416, 3)
Training set shape (y_train): (1080,)
Testing set shape (X_test): (300, 416, 416, 3)
Testing set shape (y_test): (300,)


``Encode``

In [11]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform labels in training set
y_train = label_encoder.fit_transform(y_train)

# Transform labels in testing set (using the same encoder)
y_test = label_encoder.transform(y_test)

``Model``

In [12]:
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import plot_model

# Define input shape
input_shape = (target_height, target_width, 3)

# Define backbone network (pre-trained ResNet50)
backbone = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

# Define Region Proposal Network (RPN)
def rpn(base_layers, num_anchors):
    x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)
    x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)
    x_reg = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)
    return [x_class, x_reg]

# Define ROI Pooling layer
def roi_pooling(conv, rois, pool_size):
    return tf.image.crop_and_resize(conv, rois, tf.range(tf.shape(rois)[0]), pool_size)

# Define classification and regression heads
def classifier(base_layers, input_rois, num_rois, nb_classes):
    pooling_regions = 7
    out_roi_pool = roi_pooling(base_layers, input_rois, pooling_regions)
    out = Flatten()(out_roi_pool)
    out = Dense(4096, activation='relu')(out)
    out = Dense(4096, activation='relu')(out)
    out_class = Dense(nb_classes, activation='softmax', kernel_initializer='zero')(out)
    out_regr = Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero')(out)
    return [out_class, out_regr]

# Define loss function
def rpn_loss_regr(num_anchors):
    def rpn_loss(y_true, y_pred):
        x = y_true[:, :, :, 4 * num_anchors:] - y_pred
        x_abs = K.abs(x)
        x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32')
        return K.sum(y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool)))


In [14]:
# Define smooth L1 loss function
def smooth_l1_loss(y_true, y_pred):
    diff = K.abs(y_true - y_pred)
    less_than_one = K.cast(K.less(diff, 1.0), "float32")
    loss = (less_than_one * 0.5 * diff ** 2) + (1 - less_than_one) * (diff - 0.5)
    return loss

# Compile the model
num_anchors = 9  # Number of anchors per position
num_classes = len(unique_classes) + 1  # Number of classes (including background)

# Define inputs
input_image = Input(shape=input_shape)
input_roi = Input(shape=(None, 4))  # Input shape for ROI coordinates

# Generate feature maps from the backbone network
feature_map = backbone(input_image)

# Define Region Proposal Network (RPN)
rpn_output = rpn(feature_map, num_anchors)

# Normalize ROI coordinates
input_roi_normalized = tf.concat([
    input_roi[:, :, :2] / target_height,  # Normalize x_min and y_min
    input_roi[:, :, 2:] / target_width    # Normalize x_max and y_max
], axis=-1)

# Use normalized coordinates for ROI pooling
roi_pool_output = roi_pooling(feature_map, input_roi_normalized, pool_size=(7, 7))

# Define classifier and regression heads
classifier_output = classifier(roi_pool_output, input_roi, num_rois=100, nb_classes=num_classes)

# Define model
model = Model(inputs=[input_image, input_roi], outputs=rpn_output + classifier_output)


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss=[smooth_l1_loss, smooth_l1_loss, 'sparse_categorical_crossentropy', 'sparse_categorical_crossentropy'])

``Training``

In [None]:
# Train the model
history = model.fit([X_train, np.zeros_like(X_train)],  # Passing dummy zeros for ROI input during training
                    [np.zeros((len(X_train), 1, 1, 2 * num_anchors)), np.zeros((len(X_train), 1, 1, 4 * num_anchors)),
                     y_train, y_train],  # Passing y_train twice for RPN and classifier outputs
                    batch_size=32,
                    epochs=10,
                    validation_data=([X_val, np.zeros_like(X_val)],  # Passing dummy zeros for ROI input during validation
                                     [np.zeros((len(X_val), 1, 1, 2 * num_anchors)), np.zeros((len(X_val), 1, 1, 4 * num_anchors)),
                                      y_val, y_val]))

Epoch 1/10


2024-04-10 15:53:10.923929: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 1s/step - accuracy: 0.1553 - loss: 7.3724 - val_accuracy: 0.0500 - val_loss: 2.3153
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 2s/step - accuracy: 0.2107 - loss: 4.2112 - val_accuracy: 0.2333 - val_loss: 2.7818
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 2s/step - accuracy: 0.2018 - loss: 3.8128 - val_accuracy: 0.2333 - val_loss: 2.6239
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - accuracy: 0.2119 - loss: 3.0659 - val_accuracy: 0.2250 - val_loss: 2.8696
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.2330 - loss: 3.1091 - val_accuracy: 0.2083 - val_loss: 2.2911
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 2s/step - accuracy: 0.2176 - loss: 2.5336 - val_accuracy: 0.1417 - val_loss: 2.2479
Epoch 7/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━

``Evaluation``

In [None]:
# Evaluate the model
test_loss = model.evaluate([X_test, np.zeros_like(X_test)],  # Passing dummy zeros for ROI input during evaluation
                           [np.zeros((len(X_test), 1, 1, 2 * num_anchors)), np.zeros((len(X_test), 1, 1, 4 * num_anchors)),
                            y_test, y_test])

print('Test Loss:', test_loss)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 253ms/step - accuracy: 0.1578 - loss: 1.9742
Test accuracy: 0.17666666209697723


``Prediction``

In [None]:
# Predictions can be obtained using model.predict()

# Plot the model architecture
plot_model(model, to_file='faster_rcnn_model.png', show_shapes=True)