In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


Access files and folders stored in your Google Drive within your Colab notebook by referencing the path '/content/drive/'. This allows you to read, write, or manipulate these files using Python code in the Colab environment.

In [None]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

imported the necessary libraries and modules required for handling image data, creating the neural network architecture, and setting up data augmentation techniques for training model.

In [None]:
# Assuming these are the image dimensions
image_height = 224
image_width = 224

# Load data
base_path_images = '/content/drive/My Drive/dataset/My Game Pics.v5i.tensorflow/train'
path_to_csv = '/content/drive/My Drive/dataset/My Game Pics.v5i.tensorflow/train/_annotations.csv'
df = pd.read_csv(path_to_csv)

df will contain the data from the CSV file, which can be further process and use to link annotations or labels to the corresponding images in dataset.

In [None]:
# Split the data into Train, Validation, and Test sets
train_data, test_data = train_test_split(df, test_size=0.15, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.15, random_state=42)

 splitting the dataset into training, validation, and test sets using the train_test_split function twice.

In [None]:
def load_images_and_labels(data):
    images = []
    labels = []
    for _, row in data.iterrows():
        img_filename = row['filename']
        img_path = os.path.join(base_path_images, img_filename)
        image = cv2.imread(img_path)
        if image is not None:
            image = cv2.resize(image, (image_width, image_height))
            images.append(image)
            labels.append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])
    return np.array(images), np.array(labels)


This function essentially reads images from the file paths provided in the dataset, resizes them to a standardized dimension, and collects their corresponding bounding box coordinates for further processing in machine learning pipeline.

In [None]:
# Data augmentation setup
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalizing pixel values between 0 and 1
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

These generators are useful for training deep learning models by augmenting and normalizing the input images. They facilitate better generalization and learning by exposing the model to various transformations and ensuring consistent input data scaling.

In [None]:
train_images, train_labels = load_images_and_labels(train_data)
val_images, val_labels = load_images_and_labels(val_data)
test_images, test_labels = load_images_and_labels(test_data)

Each of sets of variables (_images and _labels) holds the image data and their respective annotations or labels, which will be used during the model training, validation, and evaluation stages.

In [9]:
# Define your CNN model
model = Sequential()

# Add the feature extractor (backbone architecture)
model.add(Conv2D(filters=32, kernel_size=(7, 7), activation='relu', input_shape=(image_height, image_width, 3)))
model.add(Conv2D(filters=32, kernel_size=(9, 9), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Adding MaxPooling layer
model.add(Conv2D(filters=64, kernel_size=(7, 7), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(9, 9), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Adding MaxPooling layer
model.add(Conv2D(filters=64, kernel_size=(9, 9), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Adding MaxPooling layer
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))


The defined layers form the backbone architecture of the CNN for feature extraction.

In [10]:
model.add(BatchNormalization())  # Batch normalization

# Flatten and add Dense layers
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=4, activation='linear'))  # Output layer for bounding box coordinates
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 218, 218, 32)      4736      
                                                                 
 conv2d_1 (Conv2D)           (None, 210, 210, 32)      82976     
                                                                 
 max_pooling2d (MaxPooling2  (None, 105, 105, 32)      0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 99, 99, 64)        100416    
                                                                 
 conv2d_3 (Conv2D)           (None, 91, 91, 64)        331840    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 45, 45, 64)        0         
 g2D)                                                   

In [11]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])  # Use appropriate loss function for bounding box regression

In [None]:
# Train the model using images and labels with data augmentation
history = model.fit(train_images, train_labels, epochs=100, validation_data=(test_images, test_labels))


Epoch 1/100

In [None]:
# Evaluate the model on test data
loss = model.evaluate(test_datagen.flow(test_images, test_labels))

print(f"Test Loss: {loss}")


Test Loss: 63975.765625


In [None]:
# Perform predictions using the trained model
batch_size = 32
num_test_samples = len(test_images)
predictions = []

for i in range(0, num_test_samples, batch_size):
    batch_images = test_images[i:i+batch_size]
    batch_predictions = model.predict(test_datagen.flow(batch_images, batch_size=batch_size))
    predictions.extend(batch_predictions)

# Convert predictions to a numpy array
predictions = np.array(predictions)

In [None]:
# Compute IoU (Intersection over Union) for each predicted bounding box
def calculate_iou(y_true, y_pred):
    # Extract coordinates
    true_xmin, true_ymin, true_xmax, true_ymax = y_true
    pred_xmin, pred_ymin, pred_xmax, pred_ymax = y_pred

    # Calculate intersection coordinates
    xmin = max(true_xmin, pred_xmin)
    ymin = max(true_ymin, pred_ymin)
    xmax = min(true_xmax, pred_xmax)
    ymax = min(true_ymax, pred_ymax)

    # Calculate area of intersection rectangle
    intersection_area = max(0, xmax - xmin) * max(0, ymax - ymin)

    # Calculate areas of true and predicted rectangles
    true_area = (true_xmax - true_xmin) * (true_ymax - true_ymin)
    pred_area = (pred_xmax - pred_xmin) * (pred_ymax - pred_ymin)

    # Calculate Union area
    union_area = true_area + pred_area - intersection_area

    # Avoid division by zero
    epsilon = 1e-5

    # Calculate IoU
    iou = intersection_area / (union_area + epsilon)
    return iou

he function returns the computed IoU, which quantifies the overlap between the predicted and true bounding boxes. Higher IoU values indicate better overlap and accuracy of the predicted bounding box concerning the true one.

In [None]:
iou_scores = []
for i in range(len(test_images)):
    true_bbox = test_labels[i]
    pred_bbox = predictions[i]
    iou = calculate_iou(true_bbox, pred_bbox)
    iou_scores.append(iou)

# Calculate mean IoU
mean_iou = np.mean(iou_scores)
print(f"Mean IoU: {mean_iou}")

# Plotting accuracy
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')
plt.show()

# Plotting loss
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.show()


The mean IoU gives an overall measure of the model's accuracy in predicting bounding boxes