In [106]:
import pandas as pd
import os

In [107]:
OUTPUT_DATA_DIR = 'D:\Data\Datasets\custom_traffic_light'
DATA_DIR = 'D:\Data\Datasets\lisa_traffic_light'

In [108]:
# Define sequences
sequences = {'dayTrain': ['dayClip1', 'dayClip2', 'dayClip3', 'dayClip4', 'dayClip5', 'dayClip6', 'dayClip7', 
                          'dayClip8', 'dayClip9', 'dayClip10', 'dayClip11', 'dayClip12', 'dayClip13'],
            'daySequence1': ['']}

# Load annotations
dfs = []
for seq in sequences.keys():
    for subseq in sequences[seq]:
        annotations = os.path.join(DATA_DIR, 'Annotations', 'Annotations', seq, subseq, 'frameAnnotationsBOX.csv')
        seq_df = pd.read_csv(annotations, delimiter=';')

        # Add 'sequence' and 'subsequence' columns
        seq_df['sequence'] = seq
        seq_df['subsequence'] = subseq

        # Append to the combined DataFrame
        dfs.append(seq_df)
df = pd.concat(dfs, ignore_index=True)

# Compute traffic light width and height columns
df['width'] = df['Lower right corner X'] - df['Upper left corner X']
df['height'] = df['Lower right corner Y'] - df['Upper left corner Y']

In [109]:
from PIL import Image
from tqdm import tqdm
import re

# Parameters
image_size = 80
height_thresh = 32
scale = image_size / height_thresh

# Ensure the directory exists
classes = df['Annotation tag'].unique()
for cls in classes:
    output_dir = os.path.join(OUTPUT_DATA_DIR, cls)
    os.makedirs(output_dir, exist_ok=True)

# Filter for largest traffic lights and group by filename
filtered_df = df[df['height'] >= height_thresh]
grouped = filtered_df.groupby('Filename')

# Iterate through each group
for filepath, group in tqdm(grouped, desc="Processing images"):
    # Extract sequence for group. Assumes the sequence will be the same for all rows in the group
    seq = group['sequence'].values.tolist()[0]
    subseq = group['subsequence'].values.tolist()[0]

    # Get image file path
    basename = os.path.basename(filepath)
    filenum = re.search(r'(\d+)\.jpg$', basename).group(1)
    filename = os.path.join(DATA_DIR, seq, seq, subseq, 'frames', basename)
    
    # Get bounding box coordinates and class
    bounding_boxes = group[['Upper left corner X', 'Upper left corner Y', 'Lower right corner X', 
                            'Lower right corner Y']].values.tolist()
    classes = group['Annotation tag'].values.tolist()
    
    img = Image.open(filename)
    for i, box in enumerate(bounding_boxes):
        # Access columns by name
        cls = classes[i]

        # Calculating the center of the bounding box
        center_x = (box[0] + box[2]) / 2
        center_y = (box[1] + box[3]) / 2

        # Determining the largest dimension and increase it by 100%
        box_width = box[2] - box[0]
        box_height = box[3] - box[1]
        largest_dimension = max(box_width, box_height) * scale

        # Creating a new square bounding box
        half_size = largest_dimension / 2
        new_box = [
            max(center_x - half_size, 0), # left
            max(center_y - half_size, 0), # upper
            min(center_x + half_size, img.width), # right
            min(center_y + half_size, img.height) # lower
        ]

        # Cropping the image
        cropped_img = img.crop(new_box)

        # Resizing the image to 64x64
        cropped_img = cropped_img.resize((image_size, image_size))

        # Constructing the filename using the counter
        filename = f"{seq}_{subseq}_{filenum}_{i}.jpg"
        file_path = os.path.join(OUTPUT_DATA_DIR, cls, filename)

        # Save the cropped image
        cropped_img.save(file_path)

Processing images: 100%|██████████| 13294/13294 [04:04<00:00, 54.27it/s]


In [110]:
filtered_df['Annotation tag'].value_counts()

stop           14061
go             11347
stopLeft        6595
goLeft           686
Name: Annotation tag, dtype: int64