In [1]:
import gc
import os

import numpy as np

In [2]:
import zarr

import l5kit
from l5kit.data import ChunkedDataset, LocalDataManager

print("l5kit version:", l5kit.__version__)

l5kit version: 1.1.0


In [3]:
os.environ["L5KIT_DATA_FOLDER"] = "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"

In [8]:
import time
from datetime import datetime

In [9]:
# helper to convert a timedelta to a string (dropping milliseconds)
def deltaToString(delta):
    timeObj = time.gmtime(delta.total_seconds())
    return time.strftime('%H:%M:%S', timeObj)

class ProgressBar:
    
    # constructor
    #   maxIterations: maximum number of iterations
    def __init__(self, maxIterations):
        self.maxIterations = maxIterations
        self.granularity = 100 # 1 whole percent
    
    # start the timer
    def start(self):
        self.start = datetime.now()
    
    # check the progress of the current iteration
    #   # currentIteration: the current iteration we are on
    def check(self, currentIteration, chunked=False):
        if currentIteration % round(self.maxIterations / self.granularity) == 0 or chunked:
            
            percentage = round(currentIteration / (self.maxIterations - self.maxIterations / self.granularity) * 100)
            
            current = datetime.now()
            
            # time calculations
            timeElapsed = (current - self.start)
            timePerStep = timeElapsed / (currentIteration + 1)
            totalEstimatedTime = timePerStep * self.maxIterations
            timeRemaining = totalEstimatedTime - timeElapsed
            
            # string formatting
            percentageStr = "{:>3}%  ".format(percentage)
            remainingStr = "Remaining: {}  ".format(deltaToString(timeRemaining))
            elapsedStr = "Elapsed: {}  ".format(deltaToString(timeElapsed))
            totalStr = "Total: {}\r".format(deltaToString(totalEstimatedTime))
            
            print(percentageStr + remainingStr + elapsedStr + totalStr, end="")

    def end(self):
        print()

In [4]:
dm = LocalDataManager()
dataset_path = dm.require('scenes/sample.zarr')
zarr_dataset = ChunkedDataset(dataset_path)
zarr_dataset.open()
print(zarr_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|    100     |   24838    |  1893736   |     316008    |       0.69      |        248.38        |        76.24         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [5]:
print(zarr_dataset.agents)
print(zarr_dataset.agents.shape)
n = zarr_dataset.agents.shape

<zarr.core.Array '/agents' (1893736,) [('centroid', '<f8', (2,)), ('extent', '<f4', (3,)), ('yaw', '<f4'), ('velocity', '<f4', (2,)), ('track_id', '<u8'), ('label_probabilities', '<f4', (17,))] read-only>
(1893736,)


In [45]:
def getAgentsChunked(dataset, subsetPercent=1, chunkSize=10):

    datasetLength = round(len(dataset) * subsetPercent)
    print("datasetLength", datasetLength)
    print("chunkSize", chunkSize)
    agents = {}
    pb = ProgressBar(datasetLength)
    pb.start()
    for i in range(0, datasetLength, chunkSize):

        agentsSubset = dataset[i:i+chunkSize]
        for j in range(0,len(agentsSubset)):

            agent = agentsSubset[j]

            
            centroid = agent[0]
            yaw = agent[2]
            velocity = agent[3]
            track_id = agent[4]

            if track_id not in agents:
                agents[track_id] = []
                
            data = []
            data.append(centroid[0])
            data.append(centroid[1])
            data.append(yaw)
            data.append(velocity[0])
            data.append(velocity[1])
            
            agents[track_id].append(data)
            
            pb.check(i+j)

    return agents

In [49]:
subsetPercent = 1 #1*10**-1
print(subsetPercent)
agents = getAgentsChunked(zarr_dataset.agents, subsetPercent, 1000)

1
datasetLength 1893736
chunkSize 1000
101%  Remaining: 23:59:59  Elapsed: 00:00:13  Total: 00:00:13

In [52]:
agents[1][0:5]

[[665.0342407226562, -2207.51220703125, 1.0166751, 0.0, 0.0],
 [666.9964599609375, -2204.9560546875, 0.98688865, 0.0, 0.0],
 [667.0374145507812, -2204.88232421875, 1.0078945, 0.23517583, 0.3726723],
 [667.2947387695312, -2204.42431640625, 0.9998908, 1.0126399, 1.7491648],
 [667.902099609375, -2203.4365234375, 0.9961442, 2.2948935, 3.8041594]]

In [50]:
subsetPercent = 1*10**-2
print(subsetPercent)
agents = getAgentsChunked(zarr_dataset.agents, subsetPercent, 2)

0.01
datasetLength 18937
chunkSize 2
101%  Remaining: 00:00:00  Elapsed: 00:00:05  Total: 00:00:05

In [51]:
agents[1][0:5]

[[665.0342407226562, -2207.51220703125, 1.0166751, 0.0, 0.0],
 [666.9964599609375, -2204.9560546875, 0.98688865, 0.0, 0.0],
 [667.0374145507812, -2204.88232421875, 1.0078945, 0.23517583, 0.3726723],
 [667.2947387695312, -2204.42431640625, 0.9998908, 1.0126399, 1.7491648],
 [667.902099609375, -2203.4365234375, 0.9961442, 2.2948935, 3.8041594]]