Virtual KITTI Dataloader

In [None]:
import pandas as pd
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

annotation_folder = r'C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_textgt\Scene01\30-deg-right'
data_folder = r'C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_rgb\Scene01\30-deg-right\frames\rgb\Camera_0'

pose_df = pd.read_csv(os.path.join(annotation_folder, 'pose.txt'), delim_whitespace=True)
info_df = pd.read_csv(os.path.join(annotation_folder, 'info.txt'), delim_whitespace=True)
bbox_df = pd.read_csv(os.path.join(annotation_folder, 'bbox.txt'), delim_whitespace=True)
colors_df = pd.read_csv(os.path.join(annotation_folder, 'colors.txt'), delim_whitespace=True)
intrinsic_df = pd.read_csv(os.path.join(annotation_folder, 'intrinsic.txt'), delim_whitespace=True)
extrinsic_df = pd.read_csv(os.path.join(annotation_folder, 'extrinsic.txt'), delim_whitespace=True)

# mapping dictionary from labels to colors
label_to_color = {}
for _, row in colors_df.iterrows():
    label_to_color[row['Category']] = (row['r'], row['g'], row['b'])
    print(label_to_color)

# preprocess images
def preprocess_image(image, target_size=(224, 224)):
    image = cv2.resize(image, target_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype(np.float32) / 255.0
    return image

# Match RGB images to labels and 3D bounding boxes
def match_data(rgb_folder, pose_df, info_df, bbox_df, label_to_color):
    data = []
    for root, dirs, files in os.walk(rgb_folder):
        for file in files:
            if file.endswith('.jpg'): 

                frame_id = file.split('_')[1]
                frame_id = int(frame_id.split('.')[0])
                
                # Filter the bounding boxes for the current frame
                frame_bbox = bbox_df[bbox_df['frame'] == frame_id]
                print(frame_bbox)
                
                camera_id = 0 if 'Camera_0' in file else 1
                
                # Get the intrinsic parameters for the current camera
                intrinsic_params = intrinsic_df[(intrinsic_df['frame'] == frame_id) & (intrinsic_df['cameraID'] == camera_id)]
                K = intrinsic_params[['K[0,0]', 'K[1,1]', 'K[0,2]', 'K[1,2]']].values[0]
                
                # Get the extrinsic parameters for the current camera
                extrinsic_params = extrinsic_df[(extrinsic_df['frame'] == frame_id) & (extrinsic_df['cameraID'] == camera_id)]
                R = extrinsic_params[['r1,1', 'r1,2', 'r1,3', 'r2,1', 'r2,2', 'r2,3', 'r3,1', 'r3,2', 'r3,3']].values.reshape(3, 3)
                t = extrinsic_params[['t1', 't2', 't3']].values
                
             
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path)
                
                # check how to get the correct label for the bounding boxes in each image? What file to use to get the labels?
                for idx, row in frame_bbox.iterrows():
                    track_id = row['trackID']
                    label_info = info_df[info_df['trackID'] == track_id]
                    label = label_info['label'].values[0]
                    
                    color = label_to_color.get(label, (255, 255, 255))  
                    
                   
                    
                    data.append({'image': preprocess_image(roi), 'label': label, 'color': color, 'bbox': [obj_left, obj_top, obj_right, obj_bottom]})
                    
    return data

data = match_data(annotation_folder, pose_df, info_df, bbox_df, label_to_color)
print(data)

KITTI Dataloader

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt  


def load_data(image_dir, label_dir):
    data = []
    image_files = sorted(os.listdir(image_dir))
    
    for image_file in image_files:
        if image_file.endswith('.png'):
            image_path = os.path.join(image_dir, image_file)
            label_path = os.path.join(label_dir, image_file[:-4] + ".txt")
            if os.path.isfile(label_path):
                labels = parse_label(label_path)
                data.append({
                    "image_path": image_path,
                    "labels": labels
                })
    return data

def parse_label(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()

    labels = []
    for line in lines:
        line = line.strip()
        label_info = line.split(' ')
        if label_info[0] in ['Car', 'Truck', 'Pedestrian', 'Cyclist']:
            label = {
                "type": label_info[0],
                "truncated": float(label_info[1]),
                "occluded": int(label_info[2]),
                "alpha": float(label_info[3]),
                "bbox": [float(x) for x in label_info[4:8]],
                "dimensions": [float(x) for x in label_info[8:11]],
                "location": [float(x) for x in label_info[11:14]],
                "rotation_y": float(label_info[14]),
                "score": float(label_info[15]) if len(label_info) > 15 else None
            }
            labels.append(label)

    return labels

def visualize_data(data):
    for entry in data:
        image_path = entry["image_path"]
        labels = entry["labels"]
        image = cv2.imread(image_path)
        
        # Plot the image
        plt.figure(figsize=(10, 5))
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        # Plot bounding boxes
        for label in labels:
            bbox = label["bbox"]
            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        
        plt.show()


image_dir = r"C:\Arbeitsordner\Abgaben_repo\Datasets\KITTI\data_object_image_3\training\image_3"
label_dir = r"C:\Arbeitsordner\Abgaben_repo\Datasets\KITTI\data_object_label_2\training\label_2"
data = load_data(image_dir, label_dir)
# visualize_data(data)


In [None]:
data

WAYMO Dataset

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds

dataset, info = tfds.load('waymo_open_dataset/v1.0', data_dir='gs://waymo_open_dataset_v_1_0_0_individual_files/tensorflow_datasets', with_info=True)

# preprocess the dataset
def preprocess(example):
    image = tf.image.decode_jpeg(example['image'], channels=3)
    return image

dataset = dataset.map(preprocess)

# Plot an image from the dataset
for example in dataset.take(1):
    plt.imshow(example.numpy())
    plt.title("Image from Waymo dataset")
    plt.axis('off')
    plt.show()
