Create (train, val, test) set from dataset composed of multiple data `.pkl` files

In [1]:
import os
import shutil
import dill
import random
from glob import glob
import numpy as np
from tqdm.notebook import tqdm
from collect.generate.scene.trajectron_scene import augment_scene

DATADIR = 'carla_v2_2_dataset'
[os.path.abspath(path) for path in glob(f"{ DATADIR }/*_dataset.pkl")]

['/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-22-26_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-37-12_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_14-21-59_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-52-06_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_14-07-01_dataset.pkl']

Copy the output array into the next Jupyter notebook cell

In [2]:
ds = ['/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-22-26_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-37-12_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_14-21-59_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_13-52-06_dataset.pkl',
 '/home/fireofearth/code/robotics/carla-collect/carla_v2_2_dataset/20210714_14-07-01_dataset.pkl']
len(ds)

5

## Inspect the dataset

In [13]:
with open(ds[0], 'rb') as f:
    env = dill.load(f, encoding='latin1')
len(env.scenes)

212

## Creating (train, val, test) sets from dataset.

In [14]:
n_pkl_to_train_set = 1
val_set_idx  = 3
test_set_idx = 4
val_set_size = 30
test_set_size = 30
version_label = "v2_2"

In [20]:
# train set
print("Forming train set")
with open(ds[0], 'rb') as f:
    env = dill.load(f, encoding='latin1')
for dataset in ds[1:n_pkl_to_train_set]:
    with open(dataset, 'rb') as f:
        env2 = dill.load(f, encoding='latin1')
    env.scenes.extend(env2.scenes)
print(f"Got {len(env.scenes)} scenes")

print("Augmenting scenes")
for scene in tqdm(env.scenes):
    scene.augmented = list()
    angles = np.arange(0, 360, 15)
    for angle in angles:
        scene.augmented.append(augment_scene(scene, angle))

print("Shuffling scenes")
random.shuffle(env.scenes)
random.shuffle(env.scenes)
        
print("Saving train set")
savepath = f"{ DATADIR }/carla_train_{ version_label }_full.pkl"
with open(savepath, 'wb') as f:
    dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)

print("Saving val set")
savepath = f"{ DATADIR }/carla_val_{ version_label }_full.pkl"
if val_set_size is None:
    shutil.copyfile(ds[val_set_idx], os.path.abspath(savepath))
else:
    with open(ds[val_set_idx], 'rb') as f:
        env = dill.load(f, encoding='latin1')
    print(f"Has {len(env.scenes)} scenes, selecting {val_set_size} of them")
    random.shuffle(env.scenes)
    env.scenes = env.scenes[:val_set_size]
    with open(savepath, 'wb') as f:
        dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)

print("Saving test set")
savepath = f"{ DATADIR }/carla_test_{ version_label }_full.pkl"
if test_set_size is None:
    shutil.copyfile(ds[test_set_idx], os.path.abspath(savepath))
else:
    with open(ds[test_set_idx], 'rb') as f:
        env = dill.load(f, encoding='latin1')
    print(f"Has {len(env.scenes)} scenes, selecting {test_set_size} of them")
    random.shuffle(env.scenes)
    env.scenes = env.scenes[:test_set_size]
    with open(savepath, 'wb') as f:
        dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)

print("Done")

Forming train set
Got 212 scenes
Augmenting scenes


HBox(children=(FloatProgress(value=0.0, max=212.0), HTML(value='')))


Shuffling scenes
Saving train set
Saving val set
Has 220 scenes, selecting 30 of them
Saving test set
Has 227 scenes, selecting 30 of them
Done
