In [1]:
import gc
import os

import numpy as np

In [2]:
import zarr

import l5kit
from l5kit.data import ChunkedDataset, LocalDataManager

print("l5kit version:", l5kit.__version__)

l5kit version: 1.1.0


In [3]:
os.environ["L5KIT_DATA_FOLDER"] = "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"

In [4]:
import time
from datetime import datetime

In [5]:
# helper to convert a timedelta to a string (dropping milliseconds)
def deltaToString(delta):
    timeObj = time.gmtime(delta.total_seconds())
    return time.strftime('%H:%M:%S', timeObj)

class ProgressBar:
    
    # constructor
    #   maxIterations: maximum number of iterations
    def __init__(self, maxIterations):
        self.maxIterations = maxIterations
        self.granularity = 100 # 1 whole percent
    
    # start the timer
    def start(self):
        self.start = datetime.now()
    
    # check the progress of the current iteration
    #   # currentIteration: the current iteration we are on
    def check(self, currentIteration, chunked=False):
        if currentIteration % round(self.maxIterations / self.granularity) == 0 or chunked:
            
            percentage = round(currentIteration / (self.maxIterations - self.maxIterations / self.granularity) * 100)
            
            current = datetime.now()
            
            # time calculations
            timeElapsed = (current - self.start)
            timePerStep = timeElapsed / (currentIteration + 1)
            totalEstimatedTime = timePerStep * self.maxIterations
            timeRemaining = totalEstimatedTime - timeElapsed
            
            # string formatting
            percentageStr = "{:>3}%  ".format(percentage)
            remainingStr = "Remaining: {}  ".format(deltaToString(timeRemaining))
            elapsedStr = "Elapsed: {}  ".format(deltaToString(timeElapsed))
            totalStr = "Total: {}\r".format(deltaToString(totalEstimatedTime))
            
            print(percentageStr + remainingStr + elapsedStr + totalStr, end="")

    def end(self):
        print()

In [12]:
dm = LocalDataManager()
dataset_path = dm.require('scenes/test.zarr')
zarr_dataset = ChunkedDataset(dataset_path)
zarr_dataset.open()
print(zarr_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   11314    |  1131400   |  88594921  |    7854144    |      31.43      |        100.00        |        78.31         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [13]:
print(zarr_dataset.agents)
print(zarr_dataset.agents.shape)
n = zarr_dataset.agents.shape

<zarr.core.Array '/agents' (88594921,) [('centroid', '<f8', (2,)), ('extent', '<f4', (3,)), ('yaw', '<f4'), ('velocity', '<f4', (2,)), ('track_id', '<u8'), ('label_probabilities', '<f4', (17,))] read-only>
(88594921,)


In [14]:
def getAgentsChunked(dataset, subsetPercent=1, chunkSize=10):

    datasetLength = round(len(dataset) * subsetPercent)
    print("datasetLength", datasetLength)
    print("chunkSize", chunkSize)
    agents = {}
    pb = ProgressBar(datasetLength)
    pb.start()
    for i in range(0, datasetLength, chunkSize):

        agentsSubset = dataset[i:i+chunkSize]
        for j in range(0,len(agentsSubset)):

            agent = agentsSubset[j]

            
            centroid = agent[0]
            yaw = agent[2]
            velocity = agent[3]
            track_id = agent[4]

            if track_id not in agents:
                agents[track_id] = []
                
            data = []
            data.append(centroid[0])
            data.append(centroid[1])
            data.append(yaw)
            data.append(velocity[0])
            data.append(velocity[1])
            
            agents[track_id].append(data)
            
            pb.check(i+j)

    return agents

In [16]:
subsetPercent = 1*10**-2
print(subsetPercent)
agents = getAgentsChunked(zarr_dataset.agents, subsetPercent, 1000)

0.01
datasetLength 885949
chunkSize 1000
101%  Remaining: 00:00:00  Elapsed: 00:00:09  Total: 00:00:09

In [17]:
print(len(agents))

lengthOfAgents = []
for key in agents:
    agent = agents[key]
    lengthOfAgents.append(len(agent))

print(len(lengthOfAgents))

1106
1106


In [18]:
mean = np.mean(lengthOfAgents)
std = np.std(lengthOfAgents)
min_ = np.min(lengthOfAgents)
max_ = np.max(lengthOfAgents)
median = np.median(lengthOfAgents)

print("mean",mean)
print("std",std)
print("min_",min_)
print("max_",max_)
print("median",median)

mean 801.0849909584086
std 908.223504931662
min_ 1
max_ 8724
median 543.0
