### Setup environment

In [None]:
import sys
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import os

import tensorflow as tf

#### User defined function

In [None]:
from util_func import *

### Preprocessing of data
data obtain from http://www.nlpr.ia.ac.cn/pal/trafficdata/recognition.html

#### Step 1: Data loading

##### The annotation meaning

Example: 000_0001.png;134;128;19;7;120;117;0;

- 000_0001.png: This is the name or identifier of the image associated with the traffic sign.

The remaining values represent information about the traffic sign in the following order:
- 134: width of image
- 128: height of image
- 19: X-coordinate of the top-left corner of the traffic sign bounding box.
- 7: Y-coordinate of the top-left corner of the traffic sign bounding box.
- 120: X-coordinate of the bottom right corner
- 117: Y-coordinate of the bottom right corner
- 0: Class label or identifier of the traffic sign.

In [None]:
def load_img_annotation(img_path, annotation_path):
    # step 1: read the annotation
    # open the txt file in Read-mode ("r")
    with open(annotation_path, 'r') as file:
        annotation_list = file.read().splitlines() # split the txt file when endline, combine into a list

    img_list = []
    img_namelist = []
    sorted_annotation_list = []

    # step 2: load the image
    for image in os.listdir(img_path):
        img = cv.imread(os.path.join(img_path, image))
        if image is not None:
            img_list.append(img)

        img_namelist.append(image)

    # step 3: sort the annotation according to the image name
    for name in img_namelist:
        for annotation in annotation_list:
            a_name = annotation.split(";")[0] # split the annotation with semicolon, choose the first index
            if a_name == name: # if the first info from annotation match with the image name, then sort the annotation to front
                sorted_annotation_list.append(annotation)

    return img_list, sorted_annotation_list

In [None]:
# load dataset (train set and test set)
train_img_list, train_annotation_list = load_img_annotation("data/tsrd-train", "data\TSRD-Train Annotation\TsignRecgTrain4170Annotation.txt")
test_img_list, test_annotation_list = load_img_annotation("data/TSRD-Test", "data\TSRD-Test Annotation\TsignRecgTest1994Annotation.txt")

#### Step 2: Crop image + Resizing

Crop image based on the coordinate given by the annotation

In [None]:
# crop the image
def crop(img, top_left, bottom_right):
    return img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]

In [None]:
# crop the image based on the coordinate given
def img_crop(img_list, annotation_list):
    crop_img = []
    for i in range(len(img_list) - 1):
        tl_h, tl_w, br_h, br_w = annotation_list[i].split(";")[3:7]# top left height, top left width, bottom right height, bottom right width

        cropped = crop(img_list[i], (int(tl_h), int(tl_w)), (int(br_h), int(br_w)))
        crop_img.append(cropped)

    return crop_img

In [None]:
cropped_img_train = img_crop(train_img_list, train_annotation_list)
cropped_img_test = img_crop(test_img_list, test_annotation_list)

##### Resizing

In [None]:
rs_train = []
rs_test = []

# resize to (100, 100)
# for train set
for img in cropped_img_train:
    img_resized = cv.resize(img, (224, 224), interpolation=cv.INTER_LINEAR)
    rs_train.append(img_resized)

# for test set
for img in cropped_img_test:
    img_resized = cv.resize(img, (224, 224), interpolation=cv.INTER_LINEAR)
    rs_test.append(img_resized)


# check
for i in range(20):
    show_img("resized", rs_test[i])

#####  Image Normalization





In [None]:
def normalize_img(image_list):
    # Convert the list of images to a numpy array
    image_np = np.array(image_list)

    # Perform normalization
    normalized_images = image_np.astype('float32') / 255.0

    return normalized_images

In [None]:
rs_train_normalized = normalize_img(rs_train)
rs_test_normalized = normalize_img(rs_test)

Data augmentation

In [17]:
def augment_data(images, labels, augment_size=10000):
    datagen = ImageDataGenerator(
        rotation_range=20,  # Random rotation between -20 and 20 degrees
        width_shift_range=0.1,  # Random horizontal shift by 10% of the image width
        height_shift_range=0.1,  # Random vertical shift by 10% of the image height
        shear_range=0.2,  # Random shear transformation
        zoom_range=0.2,  # Random zoom between 80% and 120%
        horizontal_flip=True,  # Randomly flip images horizontally
        vertical_flip=False,  # No vertical
        fill_mode='nearest'  # Fill mode for filling in newly created pixels
    )
    augmented_images = []
    augmented_labels = []
    for image, label in zip(images, labels):
        image = image.reshape((1,) + image.shape)  # Expand dimensions for flow method
        i = 0
        for batch in datagen.flow(image, batch_size=1):
            if i >= augment_size:
                break
            augmented_images.append(batch[0])
            augmented_labels.append(label)
            i += 1
    return np.array(augmented_images), np.array(augmented_labels)

# Example usage:
dataset_path = 'path/to/your/dataset'
images, labels = load_dataset(dataset_path)
resized_images = resize_images(images)
normalized_images = normalize_images(resized_images)
encoded_labels = preprocess_labels(labels)
augmented_images, augmented_labels = augment_data(normalized_images, encoded_labels)

# Now you have the augmented data ready for training your CNN model.
# You can use augmented_images, augmented_labels for training.

NameError: name 'load_dataset' is not defined

: 

Data Labeling

In [None]:
def read_annotation_file(file_path):
    img_filenames = []
    class_labels = []

    #open the file in read mode
    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        info = line.strip().split(';')
        #Extract the filename in the 1st position in each line
        img_filename = info[0]
        #Extract the class label in the 8th position in each line
        class_label = int(info[7])

        img_filenames.append(img_filename)
        class_labels.append(class_label)

    return img_filenames, class_labels

In [None]:
img_filenames, class_labels = read_annotation_file("data\TSRD-Train Annotation\TsignRecgTrain4170Annotation.txt")

Data Encoding

In [None]:
def one_hot_encode(labels, num_classes):
    num_samples = len(labels)
    encoded_labels = np.zeros((num_samples, num_classes), dtype=np.float32)

    for i, label in enumerate(labels):
        encoded_labels[i, label] = 1.0

    return encoded_labels

In [None]:
num_classes = 57
encoded_class_labels = one_hot_encode(class_labels, num_classes)