In [2]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf
import os

## Training Dataset
- 1000 samples
    - 500 driving
        - 160/160/180 easy/med/hard
        - 250/250 left/right
    - 500 crashing
        - 160/160/180 easy/med/hard
- Seed: duwojdfkj

In [22]:
def merge_left_right(left_path, right_path, n_samples):
    out = []
    
    left_images = os.listdir(left_path)
    right_images = os.listdir(right_path)
    
    # Step by 2 to select every other frame (increases sample diversity and utilizes more of each recording)
    for i in range(0, n_samples*2, 2):
        path = left_path if i%4 == 0 else right_path
        image_names = left_images if i%4 == 0 else right_images
        
        image = np.array(Image.open(f"{path}/{image_names[i]}"))/255
        
        out.append(image)
    
    return out

In [23]:
drive_easy = merge_left_right("slowroads dataset/drive easy left", "slowroads dataset/drive easy right", 160)
drive_med = merge_left_right("slowroads dataset/drive med left", "slowroads dataset/drive med right", 160)
drive_hard = merge_left_right("slowroads dataset/drive hard left", "slowroads dataset/drive hard right", 180)

In [34]:
def select_random(path, n_samples):
    out = []
    
    image_names = os.listdir(path)
    np.random.shuffle(images)
    
    for image_name in image_names[:n_samples]:
        image = np.array(Image.open(f"{path}/{image_name}"))/255
        
        out.append(image)
    
    return out

In [35]:
crash_easy = select_random("slowroads dataset/crash easy", 160)
crash_med = select_random("slowroads dataset/crash med", 160)
crash_hard = select_random("slowroads dataset/crash hard", 180)

In [36]:
train_drive = drive_easy + drive_med + drive_hard
train_crash = crash_easy + crash_med + crash_hard

train_images = train_drive + train_crash
train_labels = [0]*len(train_drive) + [1]*len(train_crash)

In [37]:
train_indices = np.arange(len(train_images))

np.random.shuffle(train_indices)

train_ds = tf.data.Dataset.from_tensor_slices((
    [train_images[i] for i in train_indices],
    [train_labels[i] for i in train_indices]
))

In [39]:
train_ds.save("data/train")

## Validation Dataset
- 400 samples
    - 200 driving
    - 200 crashing
- Seed: validation

In [40]:
val_drive = select_random("slowroads dataset/drive val", 200)
val_crash = select_random("slowroads dataset/crash val", 200)

val_images = val_drive + val_crash
val_labels = [0]*len(val_drive) + [1]*len(val_crash)

In [41]:
val_indices = np.arange(len(val_images))

np.random.shuffle(val_indices)

val_ds = tf.data.Dataset.from_tensor_slices((
    [val_images[i] for i in val_indices],
    [val_labels[i] for i in val_indices]
))

In [42]:
val_ds.save("data/val")

## Test Datasets
- 96 samples each
    - 48 driving
    - 48 crashing
- Seeds: validation, testset

In [3]:
set_names = ["easy", "hard", "metal guard", "wood fence"]

In [30]:
for set_name in set_names:
    indices = np.arange(96)
    image_paths = []
    labels = []
    
    for cls in ["crash", "drive"]:
        image_names = os.listdir(f"test set/{cls} {set_name}")
        
        assert len(image_names) == 48
        
        image_paths += [f"test set/{cls} {set_name}/{image_name}" for image_name in image_names]
        
        label = int(cls == "crash")
        labels += [label]*48
    
    np.random.shuffle(indices)
    
    ds = tf.data.Dataset.from_tensor_slices((
        [np.array(Image.open(image_paths[i]))/255 for i in indices],
        [labels[i] for i in indices]
    ))
    
    ds.save(f"test set/{set_name}")
    
    print(f"Finished {set_name}")

Finished easy
Finished hard
Finished metal guard
Finished wood fence
