In [1]:
pip install zod

Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append("../")

# imports for plotting
from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = [20, 10]

# import the ZOD DevKit
from zod import ZodFrames
from zod import ZodSequences

# import default constants
import zod.constants as constants
from zod.constants import Camera, Lidar, Anonymization, AnnotationProject

# import useful data classes
from zod.data_classes import LidarData

# NOTE! Set the path to dataset and choose a version
dataset_root = "/mnt/ml-data-storage/jens/zod"
version = "full"  # "mini" or "full"

# initialize ZodFrames
zod_frames = ZodFrames(dataset_root=dataset_root, version=version)

  from .autonotebook import tqdm as notebook_tqdm
Loading infos: 100000it [00:18, 5406.15it/s]


# Train Test Split

In [3]:
# get default training and validation splits 90/10 
training_frames = zod_frames.get_split(constants.TRAIN)
validation_frames = zod_frames.get_split(constants.VAL)

# print the number of training and validation frames
print(f"Number of training frames: {len(training_frames)}")
print(f"Number of validation frames: {len(validation_frames)}")

# print out the first 5 training frames
print("The 5 first training frames have the ids:", sorted(list(training_frames))[:5])


Number of training frames: 89972
Number of validation frames: 10023
The 5 first training frames have the ids: ['000000', '000002', '000003', '000004', '000005']


In [4]:
all_frames = training_frames | validation_frames
len(all_frames) #Why are 4 images missing? 

99995

Remove frames with no dynamic objects

In [5]:
dynamic_classes = ["Car", "Van", "Truck", "Animal", "Trailer", "Bus", "HeavyEquip", "Wheelchair", "TramTrain", "Bicycle", "Motorcycle", "Stroller", "PersonalTransporter", "Pedestrian"] 

training_frames_dynamic = []
test_frames_dynamic = []
for frame in zod_frames: 
    print(frame.metadata.frame_id)
    if frame.metadata.frame_id in training_frames: 
        annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
        class_names = [annotation.object_type for annotation in annotations]

        # Check if there are any dynamic classes in the frame
        if any(class_name in dynamic_classes for class_name in class_names): 
            # If so, add the frame id to the training set
            training_frames_dynamic.append(frame.metadata.frame_id)

    if frame.metadata.frame_id in validation_frames:
        annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
        class_names = [annotation.object_type for annotation in annotations]

        # Check if there are any dynamic classes in the frame
        if any(class_name in dynamic_classes for class_name in class_names): 
            # If so, add the frame id to the test set
            test_frames_dynamic.append(frame.metadata.frame_id)

000000
000002
000003
000004
000005
000007
000008
000009
000010
000011
000012
000013
000014
000015
000016
000017
000018
000019
000020
000021
000022
000023
000024
000025
000026
000027
000028
000029
000030
000031
000032
000033
000034
000035
000036
000037
000038
000040
000041
000042
000043
000044
000045
000046
000047
000048
000050
000051
000053
000055
000056
000057
000058
000059
000060
000061
000062
000063
000064
000065
000066
000068
000069
000070
000072
000073
000074
000075
000076
000077
000078
000079
000080
000081
000082
000083
000084
000085
000086
000087
000088
000089
000090
000091
000092
000093
000094
000095
000096
000097
000098
000100
000101
000102
000103
000104
000105
000106
000107
000108
000109
000110
000111
000112
000113
000114
000115
000116
000117
000118
000119
000120
000121
000122
000123
000124
000125
000126
000127
000129
000130
000131
000132
000133
000134
000135
000136
000137
000138
000139
000140
000141
000142
000143
000144
000145
000146
000147
000148
000149
000150
000151
000152

In [6]:
print("Nr of removed training frames: ", len(training_frames) - len(training_frames_dynamic))
print("Fraction of removed training frames: ", (len(training_frames) - len(training_frames_dynamic))/len(training_frames)*100, "%")
print("\n")
print("Nr of removed test frames: ", len(validation_frames) - len(test_frames_dynamic))
print("Fraction of removed test frames: ", (len(validation_frames) - len(test_frames_dynamic))/len(validation_frames)*100, "%")

Nr of removed training frames:  178
Fraction of removed training frames:  0.19783932779086827 %


Nr of removed test frames:  15
Fraction of removed test frames:  0.149655791679138 %


In [35]:
# Saving the train and test frames as txt files

#TRAIN SET
file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_train.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in training_frames_dynamic:
        file.write(f"{frame_id}\n")

#TEST SET
file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/new_zod_all_task_test.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in test_frames_dynamic:
        file.write(f"{frame_id}\n")

In [7]:
#TEST SET
file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/new_zod_all_task_test.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in test_frames_dynamic:
        file.write(f"{frame_id}\n")

Load train and test frames directly from teh project

In [3]:
# Load training frames (so we dont have to create them again)
with open('/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_train.txt', 'r') as file:
    training_frames = [line.strip() for line in file]

# Load test frames (so we dont have to create them again)
with open('/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_all_task_test.txt', 'r') as file:
    test_frames = [line.strip() for line in file]


# Class frequencies

In [8]:
dynamic_classes = ["Car", "Van", "Truck", "Animal", "Trailer", "Bus", "HeavyEquip", "Wheelchair", "TramTrain", "Bicycle", "Motorcycle", "Stroller", "PersonalTransporter", "Pedestrian"]

# Initialize a dictionary to count the frequency of each class
class_frequency = {class_name: 0 for class_name in dynamic_classes}

for frame in zod_frames: 
    print(frame.metadata.frame_id)
    annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
    class_names = [annotation.object_type for annotation in annotations]

    #Update the counts 
    for class_name in class_names:
        if class_name in class_frequency:
            class_frequency[class_name] += 1

000000
000002
000003
000004
000005
000007
000008
000009
000010
000011
000012
000013
000014
000015
000016
000017
000018
000019
000020
000021
000022
000023
000024
000025
000026
000027
000028
000029
000030
000031
000032
000033
000034
000035
000036
000037
000038
000040
000041
000042
000043
000044
000045
000046
000047
000048
000050
000051
000053
000055
000056
000057
000058
000059
000060
000061
000062
000063
000064
000065
000066
000068
000069
000070
000072
000073
000074
000075
000076
000077
000078
000079
000080
000081
000082
000083
000084
000085
000086
000087
000088
000089
000090
000091
000092
000093
000094
000095
000096
000097
000098
000100
000101
000102
000103
000104
000105
000106
000107
000108
000109
000110
000111
000112
000113
000114
000115
000116
000117
000118
000119
000120
000121
000122
000123
000124
000125
000126
000127
000129
000130
000131
000132
000133
000134
000135
000136
000137
000138
000139
000140
000141
000142
000143
000144
000145
000146
000147
000148
000149
000150
000151
000152

KeyboardInterrupt: 

Use list of class frequencies to divide the classes into the four tasks. 
* Start with the most common classes
* Try to keep a balance of nr of instances between tasks. 

In [10]:
sorted(class_frequency.items(), key=lambda x: x[1], reverse=True)

[('Car', 1208474),
 ('Pedestrian', 359177),
 ('Bicycle', 124122),
 ('Van', 115042),
 ('Truck', 78795),
 ('Motorcycle', 32197),
 ('PersonalTransporter', 28490),
 ('Bus', 25802),
 ('TramTrain', 13576),
 ('Trailer', 4887),
 ('Stroller', 4199),
 ('HeavyEquip', 4021),
 ('Animal', 3439),
 ('Wheelchair', 467)]

In [5]:
# Method for counting the number of dynamic instances in a set of frames

def count_dynamic_instances(frame_ids): 
    dynamic_classes = ["Car", "Van", "Truck", "Animal", "Trailer", "Bus", "HeavyEquip", "Wheelchair", "TramTrain", "Bicycle", "Motorcycle", "Stroller", "PersonalTransporter", "Pedestrian"]
    class_frequency = {class_name: 0 for class_name in dynamic_classes}
    for frame in zod_frames: 
        print(frame.metadata.frame_id)
        if frame.metadata.frame_id in frame_ids: 
            annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
            class_names = [annotation.object_type for annotation in annotations]
            for class_name in class_names: 
                if class_name in dynamic_classes: 
                    class_frequency[class_name] += 1
    

    sorted_class_frequency = sorted(class_frequency.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_class_frequency

# Splitting the data

* No overlap of images between tasks 
* No instances of subsequent classes in the task

In [5]:
T1_classes = [
    "Car", "Pedestrian", "Bicycle", "Van"
    ]

T2_classes = [
    "Truck", "Trailer"
]

T3_classes = [
    "TramTrain", "Motorcycle", "Stroller", "HeavyEquip"
]

T4_classes = [
    "Bus", "PersonalTransporter", "Animal", "Wheelchair"
]

t1_train = []
t2_train = []
t3_train = []
t4_train = []

In [6]:
# Load training frames (so we dont have to create them again)
with open('/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t1_train.txt', 'r') as file:
    t1_train = [line.strip() for line in file]

# Load test frames (so we dont have to create them again)
with open('/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t2_train.txt', 'r') as file:
    t2_train = [line.strip() for line in file]

Appending to t1_train
* Append images from the training set 
* Append images that dont conatin classes from T2 T3 or T4 
* Append images that contain at least one object from T1

In [None]:
training_frames = list(training_frames) 

excluded_classes = T2_classes + T3_classes + T4_classes

# For the zod frames that are in the training frames, only append images to t1_train if it does not contain classes T2, T3, T4 but contains at least one class from T1
for frame in zod_frames: 
    print(frame.metadata.frame_id)
    if frame.metadata.frame_id in training_frames:
        annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
        class_names = [annotation.object_type for annotation in annotations]

        # Check for inclusion of T1_classes and exclusion of excluded_classes
        if any(item in T1_classes for item in class_names) and not any(item in excluded_classes for item in class_names):
            t1_train.append(frame.metadata.frame_id)
            


In [12]:
print("Number of training frames in T1: ", len(t1_train), "\nTotal number of training frames: ", len(training_frames))

Number of training frames in T1:  30130 
Total number of training frames:  89794


In [13]:
# Save t1 frames to txt file

file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t1_train.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in t1_train:
        file.write(f"{frame_id}\n")

In [None]:
# Count the number of dynamic instances in the T1 training set

freq = count_dynamic_instances(t1_train)

In [15]:
freq

[('Car', 252325),
 ('Pedestrian', 55006),
 ('Bicycle', 25531),
 ('Van', 20328),
 ('Truck', 0),
 ('Animal', 0),
 ('Trailer', 0),
 ('Bus', 0),
 ('HeavyEquip', 0),
 ('Wheelchair', 0),
 ('TramTrain', 0),
 ('Motorcycle', 0),
 ('Stroller', 0),
 ('PersonalTransporter', 0)]

Appending to t2_train
* Exlude images from t1_train
* Append images that dont conatin classes from T3 or T4
* Append images that conatin at least one object from T2 

In [None]:
excluded_classes = T3_classes + T4_classes

for frame in zod_frames: 
    print(frame.metadata.frame_id)
    if frame.metadata.frame_id in training_frames and frame.metadata.frame_id not in t1_train:
        annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
        class_names = [annotation.object_type for annotation in annotations]

        # Check for inclusion of T2_classes and exclusion of excluded_classes
        if any(item in T2_classes for item in class_names) and not any(item in excluded_classes for item in class_names):
            t2_train.append(frame.metadata.frame_id)

In [17]:
print("Nr of training frames in t2 :", len(t2_train))
print("Remaining training frames: ", len(training_frames) - len(t1_train) - len(t2_train))

Nr of training frames in t2 : 18972
Remaining training frames:  40692


In [18]:
# Find the overlap
overlap = [item for item in t1_train if item in t2_train]

print("Overlap:", overlap)

Overlap: []


In [None]:
freq = count_dynamic_instances(t2_train)

In [20]:
freq

[('Car', 210187),
 ('Truck', 42229),
 ('Pedestrian', 29519),
 ('Van', 23269),
 ('Bicycle', 12648),
 ('Trailer', 2373),
 ('Animal', 0),
 ('Bus', 0),
 ('HeavyEquip', 0),
 ('Wheelchair', 0),
 ('TramTrain', 0),
 ('Motorcycle', 0),
 ('Stroller', 0),
 ('PersonalTransporter', 0)]

In [21]:
# Save t2 frames to txt file

file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t2_train.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in t2_train:
        file.write(f"{frame_id}\n")

Appending to t3_train
* Exlude images from t1_train & t2_train
* Append images that dont conatin classes from T4
* Append images that conatin at least one object from T3

In [10]:
excluded_classes = T4_classes

for frame in zod_frames: 
    print(frame.metadata.frame_id)
    if frame.metadata.frame_id in training_frames and frame.metadata.frame_id not in (t1_train + t2_train) and frame.metadata.frame_id not in t3_train:
        annotations = frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
        class_names = [annotation.object_type for annotation in annotations]

        # Check for inclusion of T3_classes and exclusion of excluded_classes
        if any(item in T3_classes for item in class_names) and not any(item in excluded_classes for item in class_names):
            t3_train.append(frame.metadata.frame_id)

000000
000002
000003
000004
000005
000007
000008
000009
000010
000011
000012
000013
000014
000015
000016
000017
000018
000019
000020
000021
000022
000023
000024
000025
000026
000027
000028
000029
000030
000031
000032
000033
000034
000035
000036
000037
000038
000040
000041
000042
000043
000044
000045
000046
000047
000048
000050
000051
000053
000055
000056
000057
000058
000059
000060
000061
000062
000063
000064
000065
000066
000068
000069
000070
000072
000073
000074
000075
000076
000077
000078
000079
000080
000081
000082
000083
000084
000085
000086
000087
000088
000089
000090
000091
000092
000093
000094
000095
000096
000097
000098
000100
000101
000102
000103
000104
000105
000106
000107
000108
000109
000110
000111
000112
000113
000114
000115
000116
000117
000118
000119
000120
000121
000122
000123
000124
000125
000126
000127
000129
000130
000131
000132
000133
000134
000135
000136
000137
000138
000139
000140
000141
000142
000143
000144
000145
000146
000147
000148
000149
000150
000151
000152

In [11]:
print("Nr of training frames in t3 :", len(t3_train))
print("Remaining training frames: ", len(training_frames) - len(t1_train) - len(t2_train) -len(t3_train))

Nr of training frames in t3 : 15893
Remaining training frames:  24799


In [12]:
# Find the overlap
overlap = [item for item in (t1_train + t2_train) if item in t3_train]

print("Overlap:", overlap)

Overlap: []


In [13]:
freq = count_dynamic_instances(t3_train)

000000
000002
000003
000004
000005
000007
000008
000009
000010
000011
000012
000013
000014
000015
000016
000017
000018
000019
000020
000021
000022
000023
000024
000025
000026
000027
000028
000029
000030
000031
000032
000033
000034
000035
000036
000037
000038
000040
000041
000042
000043
000044
000045
000046
000047
000048
000050
000051
000053
000055
000056
000057
000058
000059
000060
000061
000062
000063
000064
000065
000066
000068
000069
000070
000072
000073
000074
000075
000076
000077
000078
000079
000080
000081
000082
000083
000084
000085
000086
000087
000088
000089
000090
000091
000092
000093
000094
000095
000096
000097
000098
000100
000101
000102
000103
000104
000105
000106
000107
000108
000109
000110
000111
000112
000113
000114
000115
000116
000117
000118
000119
000120
000121
000122
000123
000124
000125
000126
000127
000129
000130
000131
000132
000133
000134
000135
000136
000137
000138
000139
000140
000141
000142
000143
000144
000145
000146
000147
000148
000149
000150
000151
000152

In [14]:
freq

[('Car', 240818),
 ('Pedestrian', 70227),
 ('Van', 23271),
 ('Bicycle', 22554),
 ('Motorcycle', 18856),
 ('Truck', 12969),
 ('TramTrain', 6171),
 ('HeavyEquip', 2550),
 ('Stroller', 2080),
 ('Trailer', 911),
 ('Animal', 0),
 ('Bus', 0),
 ('Wheelchair', 0),
 ('PersonalTransporter', 0)]

In [15]:
# Save t3 frames to txt file

file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t3_train.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in t3_train:
        file.write(f"{frame_id}\n")

Appending to t4_train
* The remaining frames

In [16]:
t4_train = [item for item in training_frames if item not in (t1_train + t2_train + t3_train)]

In [17]:
len(t4_train)

24799

In [None]:
freq = count_dynamic_instances(t4_train)

In [19]:
freq

[('Car', 383120),
 ('Pedestrian', 165678),
 ('Bicycle', 49587),
 ('Van', 36903),
 ('PersonalTransporter', 25257),
 ('Bus', 23381),
 ('Truck', 15767),
 ('Motorcycle', 9360),
 ('TramTrain', 6319),
 ('Animal', 3098),
 ('Stroller', 1639),
 ('Trailer', 1174),
 ('HeavyEquip', 1096),
 ('Wheelchair', 412)]

In [20]:
# Save t4 frames to txt file

file_path = '/home/hannalukas/PROB/PROB_exp/data/OWOD/ImageSets/ZOD/zod_t4_train.txt'

# Writing the frame IDs to the file
with open(file_path, 'w') as file:
    for frame_id in t4_train:
        file.write(f"{frame_id}\n")