In [1]:
import gc
import os

import numpy as np

In [2]:
import zarr

import l5kit
from l5kit.data import ChunkedDataset, LocalDataManager

print("l5kit version:", l5kit.__version__)

l5kit version: 1.1.0


In [3]:
os.environ["L5KIT_DATA_FOLDER"] = "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"

In [4]:
import time
from datetime import datetime

In [5]:
# helper to convert a timedelta to a string (dropping milliseconds)
def deltaToString(delta):
    timeObj = time.gmtime(delta.total_seconds())
    return time.strftime('%H:%M:%S', timeObj)

class ProgressBar:
    
    # constructor
    #   maxIterations: maximum number of iterations
    def __init__(self, maxIterations):
        self.maxIterations = maxIterations
        self.granularity = 100 # 1 whole percent
    
    # start the timer
    def start(self):
        self.start = datetime.now()
    
    # check the progress of the current iteration
    #   # currentIteration: the current iteration we are on
    def check(self, currentIteration, chunked=False):
        if currentIteration % round(self.maxIterations / self.granularity) == 0 or chunked:
            
            percentage = round(currentIteration / (self.maxIterations - self.maxIterations / self.granularity) * 100)
            
            current = datetime.now()
            
            # time calculations
            timeElapsed = (current - self.start)
            timePerStep = timeElapsed / (currentIteration + 1)
            totalEstimatedTime = timePerStep * self.maxIterations
            timeRemaining = totalEstimatedTime - timeElapsed
            
            # string formatting
            percentageStr = "{:>3}%  ".format(percentage)
            remainingStr = "Remaining: {}  ".format(deltaToString(timeRemaining))
            elapsedStr = "Elapsed: {}  ".format(deltaToString(timeElapsed))
            totalStr = "Total: {}\r".format(deltaToString(totalEstimatedTime))
            
            print(percentageStr + remainingStr + elapsedStr + totalStr, end="")

    def end(self):
        print()

In [6]:
dm = LocalDataManager()
dataset_path = dm.require('scenes/test.zarr')
zarr_dataset = ChunkedDataset(dataset_path)
zarr_dataset.open()
print(zarr_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   11314    |  1131400   |  88594921  |    7854144    |      31.43      |        100.00        |        78.31         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [7]:
print(zarr_dataset.agents)
print(zarr_dataset.agents.shape)
n = zarr_dataset.agents.shape

<zarr.core.Array '/agents' (88594921,) [('centroid', '<f8', (2,)), ('extent', '<f4', (3,)), ('yaw', '<f4'), ('velocity', '<f4', (2,)), ('track_id', '<u8'), ('label_probabilities', '<f4', (17,))] read-only>
(88594921,)


In [12]:
test_mask = np.load('../input/lyft-motion-prediction-autonomous-vehicles/scenes/mask.npz')
for k in test_mask.files:
    print("key:",k)
test_mask = test_mask["arr_0"]
print("test_mask", test_mask)
print("test_mask.shape", test_mask.shape)
print("test_mask[0]", test_mask[0])

key: arr_0
test_mask [False False False ... False False False]
test_mask.shape (88594921,)
test_mask[0] False


In [28]:
def getAgentsChunked(dataset, subsetPercent=1, chunkSize=10, mask_copy=[]):

    datasetLength = round(len(dataset) * subsetPercent)
    print("datasetLength", datasetLength)
    print("chunkSize", chunkSize)
    agents = {}
    pb = ProgressBar(datasetLength)
    pb.start()
    for i in range(0, datasetLength, chunkSize):

        agentsSubset = dataset[i:i+chunkSize]
        for j in range(0,len(agentsSubset)):
            pb.check(i+j)
            if len(mask_copy) > 0 and (j + i < len(mask_copy)) and not(mask_copy[i+j]):
                continue
            
            agent = agentsSubset[j]
            
            centroid = agent[0]
            yaw = agent[2]
            velocity = agent[3]
            track_id = agent[4]

            if track_id not in agents:
                agents[track_id] = []
                
            data = []
            data.append(centroid[0])
            data.append(centroid[1])
            data.append(yaw)
            data.append(velocity[0])
            data.append(velocity[1])
            
            agents[track_id].append(data)
            

    return agents

In [46]:
subsetPercent = 1 # 1*10**-1
subsetLength = round(len(test_mask) * subsetPercent)
print("subsetLength", subsetLength)
count = 0
pb = ProgressBar(subsetLength)
pb.start()
chunkSize = 1000
mask_copy = []
mask_indexes = []
for i in range(0, subsetLength, chunkSize):
    chunkedTestMask = test_mask[i: i + chunkSize]
    for j in range(0, len(chunkedTestMask)):
        mask = chunkedTestMask[j]
        mask_copy.append(mask)
        if mask:
            mask_indexes.append(i + j)
            count = count + 1
        pb.check(i+j)
pb.end()
print("count", count)

subsetLength 88594921
101%  Remaining: 23:59:22  Elapsed: 00:02:05  Total: 00:01:28
count 71122


In [47]:
print(subsetPercent)
agents = getAgentsChunked(zarr_dataset.agents, subsetPercent, 1000, mask_copy)

1
datasetLength 88594921
chunkSize 1000
101%  Remaining: 23:59:16  Elapsed: 00:05:09  Total: 00:04:25

In [48]:
print(len(agents))

lengthOfAgents = []
pb = ProgressBar(len(agents))
count = 0
pb.start()
for key in agents:
    agent = agents[key]
    lengthOfAgents.append(len(agent))
    count += 1
    pb.check(count)
pb.end()

print(len(lengthOfAgents))

1152
  1%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  2%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  3%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  4%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  5%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  6%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  7%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  8%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00  9%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 11%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 12%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 13%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 14%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 15%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 16%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 17%  Remaining: 00:00:00  Elapsed: 00:00:00  Total: 00:00:00 18

In [49]:
mean = np.mean(lengthOfAgents)
std = np.std(lengthOfAgents)
min_ = np.min(lengthOfAgents)
max_ = np.max(lengthOfAgents)
median = np.median(lengthOfAgents)

print("mean",mean)
print("std",std)
print("min_",min_)
print("max_",max_)
print("median",median)

mean 61.73784722222222
std 249.62442766490668
min_ 1
max_ 4529
median 33.0
