In [1]:
# Imports

import os
import torch
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision import transforms

from torchvision.transforms import ToPILImage
import json
import numpy as np
import random
import cv2

# Set seed for randomize functions (Ez reproduction of results)
random.seed(100)

  warn(f"Failed to load image Python extension: {e}")


In [2]:
# Get path directories for clips and annotations for the TUSimple dataset + ground truth dictionary

root_dir = os.path.dirname(os.getcwd())
annotated_dir = os.path.join(root_dir,'datasets/tusimple/train_set/annotations')
clips_dir = os.path.join(root_dir,'datasets/tusimple/train_set/')

annotated = os.listdir(annotated_dir)

annotations = list()
for gt_file in annotated:
    path = os.path.join(annotated_dir,gt_file)
    json_gt = [json.loads(line) for line in open(path)]
    annotations.append(json_gt)
    
annotations = [a for f in annotations for a in f]

print(len(annotations))

3626


In [20]:
# Generate segmentation masks functionality/ Useful for resizing ground truth masks NOTE: dims = (H,W,C)
img_path = annotations[11]['raw_file']
image = cv2.imread(os.path.join(clips_dir, img_path))


# Generate segmentation mask for a given image
def generate_seg_mask(ground_truth: dict, image : np.array):
    # image_path = ground_truth['raw_file']
    # image = cv2.imread(os.path.join(train_img_dir,image_path))
    masks = np.zeros_like(image[:,:,0])
    nolane_token = -2 
    h_vals = ground_truth['h_samples']
    lanes = ground_truth['lanes']
    lane_val = 255
    lane_markings_list = []
    for lane in lanes:
        x_coords = []
        y_coords = []
        for i in range(0,len(lane)):             
            if lane[i] != nolane_token:
                x_coords.append(lane[i])
                y_coords.append(h_vals[i])
                lane_markings = list(zip(x_coords, y_coords))
        lane_markings_list.append(lane_markings)        
    for z in lane_markings_list:
        for x,y in z:
            masks[y,x] = 1
    lane_markings_img = cv2.bitwise_and(image, image, mask=masks)
    lane_markings_img [lane_markings_img != 0] = lane_val
    return lane_markings_img  

masked_image = generate_seg_mask(annotations[11], image)

# Helper func to display image with OpenCV
def disp_img(image, name = 'Image'):
    cv2.imshow(name,image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()  
    

# disp_img(image,'Original Image')
disp_img(masked_image,'Original Mask')    

resized1 = cv2.resize(image,(640,640), interpolation = cv2.INTER_LINEAR)
resized_mask = cv2.resize(masked_image,(640,640), interpolation = cv2.INTER_LINEAR)

# Set all resized pixels color to white (thresholding)
resized_mask [resized_mask !=0] = 255

# disp_img(resized_mask,'Resized')

# concatenate image Horizontally
Hori = np.concatenate((resized1, resized_mask), axis=1)

disp_img(Hori,'Resized Image/Seg Mask')

In [21]:
# Get list of lists containing ground truth lane pixel values for all lanes with respect to the original number of lanes in the original gt
def get_resized_gt(resized_mask: np.array, original_gt: dict):
    lane_pixels = list(np.argwhere(resized_mask == 255)[:,:2])
    lane_pixels = [tuple(arr) for arr in lane_pixels]
    lane_pixels = list(dict.fromkeys(lane_pixels))
    gt_dict = {'lane_pix': lane_pixels ,'raw_file': original_gt['raw_file']}
    return gt_dict

resized_gt = get_resized_gt(resized_mask, annotations[11])

print(resized_gt)

{'lane_pix': [(222, 331), (223, 331), (231, 305), (231, 326), (231, 341), (231, 360), (240, 288), (240, 320), (240, 347), (240, 386), (248, 271), (248, 315), (248, 353), (248, 413), (249, 271), (249, 315), (249, 353), (249, 413), (257, 254), (257, 309), (257, 359), (257, 439), (258, 254), (258, 309), (258, 359), (258, 439), (266, 237), (266, 304), (266, 365), (266, 465), (267, 237), (267, 304), (267, 365), (267, 465), (275, 219), (275, 298), (275, 371), (275, 492), (276, 219), (276, 298), (276, 371), (276, 492), (284, 202), (284, 293), (284, 377), (284, 518), (285, 202), (285, 293), (285, 377), (285, 518), (293, 185), (293, 287), (293, 382), (293, 544), (294, 185), (294, 287), (294, 382), (294, 544), (302, 168), (302, 282), (302, 388), (302, 571), (303, 168), (303, 282), (303, 388), (303, 571), (311, 151), (311, 276), (311, 394), (311, 597), (320, 134), (320, 270), (320, 400), (320, 623), (328, 117), (328, 265), (328, 406), (329, 117), (329, 265), (329, 406), (337, 100), (337, 259), (3

In [None]:
# Loading/Resizing image test scenario
img_path = annotations[0]['raw_file']
train_transforms = transforms.Compose([transforms.ToTensor(),
                                transforms.Resize(size=(640,640))])
image = cv2.imread(os.path.join(clips_dir, img_path))
print(image.shape)
image_tensor = train_transforms(image)
print(image_tensor.shape)



# EZ convert from normalized tensor to np.array (0,255) scale for RGB images
convert = transforms.Compose([transforms.ToPILImage()])
array = np.array(convert(image_tensor))
print(array.shape)


In [5]:
# TuSimple Dataset loader and pre-processing class
# Full Size: Train(3626 clips/ 20 frames per clip/ 20th only is annotated), Test(2782 clips/ 20 frames per clip/ 20th only annotated)
# Link: https://github.com/TuSimple/tusimple-benchmark/tree/master/doc/lane_detection
class TuSimple(Dataset):  
    def __init__(self, train_annotations : list, train_img_dir: str, resize_to : tuple , subset_size = 0.2, image_size = (1280,720), val_size = 0.15):
        self.image_size = image_size
        self.resize = resize_to
        self.val_size = val_size
        self.subset = subset_size
        self.train_dir = train_img_dir
        self.complete_gt = train_annotations
        self.complete_size = len(train_annotations)
        self.train_dataset, self.train_gt = self.generate_dataset()
        
    def __len__(self):
        if len(self.train_dataset) == len(self.train_gt):
            return len(self.train_gt)
        else:
            return "Dataset generation failure: Size of training images does not match the existing ground truths."
    
    def __getitem__(self, idx):
        if len(self.train_dataset) == len(self.train_gt):
            img_tensor = self.train_dataset[idx]
            img_gt = self.train_gt[idx]
            return img_tensor, img_gt
        else:
            return "The dataset hasn't been constructed properly. Generate again!"
    
    # Returns original image size for the dataset    
    def get_image_size(self):
        return self.image_size
        
    # Partition dataset according to input subset size and dynamically generate the train/val splits
    def generate_dataset(self):
        train_set = []
        
        complete_idx = [idx for idx in range(0, self.complete_size + 1)]
        target_samples = int(self.complete_size * self.subset)
        # val_samples = int(len(target_samples) * self.val_size)
        shuffled = random.sample(complete_idx,len(complete_idx))
        
        # Pick n (target samples no) idx from the shuffled dataset
        dataset_idxs = [shuffled[idx] for idx in range(0, target_samples)]
        train_gt = [self.complete_gt[idx] for idx in dataset_idxs]
        
        # Load images, resize inputs, transform to tensors and generate dataset (or subset)
        for gt in train_gt:
            img_path = gt['raw_file']
            train_transforms = transforms.Compose([transforms.ToTensor(),
                                                   transforms.Resize(size = self.resize)])
            image = cv2.imread(os.path.join(self.train_dir, img_path))
            img_tensor = train_transforms(image)
            train_set.append(img_tensor)
        
        return train_set, train_gt   
        

In [6]:
dataset = TuSimple(train_annotations = annotations, train_img_dir = clips_dir, resize_to = (640,640), subset_size = 0.05)