In [24]:
# Required: Import packages
import json
import pandas as pd

# 0. Load and Clean Data

In [25]:
# Required: Load labels json file containing image object labels

with open('bdd100k/labels/bdd100k_labels_images_train.json') as json_file:
    data = json.load(json_file)

In [26]:
# Required: Normalize semi-structured JSON data into a pd dataframe
data_normalized = pd.json_normalize(data)
type(data_normalized)

pandas.core.frame.DataFrame

In [30]:
# Optional - Testing only -- check number of images 
data_normalized.shape

(69863, 27)

In [None]:
# Optional - Testing only -- check format and contents of df
data_normalized.head()

In [None]:
# Optional - Testing only -- check the df values corresponding to a specific image file name
data_normalized[data_normalized['name'] == '0a0a0b1a-7c39d841.jpg']


In [None]:
# Optional - Testing only -- check what format the "labels" contains
type(data_normalized[data_normalized['name'] == '0a0a0b1a-7c39d841.jpg'].labels)
data_normalized.iloc[3915].labels

**We get a total of 3575 images taken during clear weather, highway, and daytime.**

# 1. Determine number of objects in the image

In [29]:
# Initialize lists of object types. Each list will contain 69863 values, each of which corresponds to an image specified as a row in data_cleaned 
num_objects = []
num_road = []
num_sidewalk = []
num_building = []
num_wall = []
num_fence = []
num_pole = []
num_traffic_light = []
num_traffic_sign = []
num_vegetation = []
num_terrain = []
num_sky = []
num_person = []
num_rider = []
num_car = []
num_truck = []
num_bus = []
num_train = []
num_motorcycle = []
num_bicycle = []
num_lanes = []
num_drivable_area = []

# Iterate through each image in data_normalized, sum up the number of objects corresponding to each object type, and append to
# the corresponding object list
for index, row in data_normalized.iterrows():
    num_objects.append(len(row['labels']))
    num_road.append(sum(x['category'] == 'road' for x in row['labels']))
    num_sidewalk.append(sum(x['category'] == 'sidewalk' for x in row['labels']))
    num_building.append(sum(x['category'] == 'building' for x in row['labels']))
    num_wall.append(sum(x['category'] == 'wall' for x in row['labels']))
    num_fence.append(sum(x['category'] == 'fence' for x in row['labels']))
    num_pole.append(sum(x['category'] == 'pole' for x in row['labels']))
    num_traffic_light.append(sum(x['category'] == 'traffic light' for x in row['labels']))
    num_traffic_sign.append(sum(x['category'] == 'traffic sign' for x in row['labels']))
    num_vegetation.append(sum(x['category'] == 'vegetation' for x in row['labels']))
    num_terrain.append(sum(x['category'] == 'terrain' for x in row['labels']))
    num_sky.append(sum(x['category'] == 'sky' for x in row['labels']))
    num_person.append(sum(x['category'] == 'person' for x in row['labels']))
    num_rider.append(sum(x['category'] == 'rider' for x in row['labels']))
    num_car.append(sum(x['category'] == 'car' for x in row['labels']))
    num_truck.append(sum(x['category'] == 'truck' for x in row['labels']))
    num_bus.append(sum(x['category'] == 'bus' for x in row['labels']))
    num_train.append(sum(x['category'] == 'train' for x in row['labels']))
    num_motorcycle.append(sum(x['category'] == 'motorcycle' for x in row['labels']))
    num_bicycle.append(sum(x['category'] == 'bicycle' for x in row['labels']))
    num_lanes.append(sum(x['category'] == 'lane' for x in row['labels']))
    num_drivable_area.append(sum(x['category'] == 'drivable area' for x in row['labels']))

data_normalized['num_objects'] = num_objects
data_normalized['num_sidewalk'] = num_sidewalk
data_normalized['num_building'] = num_building
data_normalized['num_wall'] = num_wall
data_normalized['num_fence'] = num_fence
data_normalized['num_pole'] = num_pole
data_normalized['num_traffic_light'] = num_traffic_light
data_normalized['num_traffic_sign'] = num_traffic_sign
data_normalized['num_vegetation'] = num_vegetation
data_normalized['num_terrain'] = num_terrain
data_normalized['num_sky'] = num_sky
data_normalized['num_person'] = num_person
data_normalized['num_rider'] = num_rider
data_normalized['num_car'] = num_car
data_normalized['num_truck'] = num_truck
data_normalized['num_bus'] = num_bus
data_normalized['num_train'] = num_train
data_normalized['num_motorcycle'] = num_motorcycle
data_normalized['num_bicycle'] = num_bicycle
data_normalized['num_lanes'] = num_lanes
data_normalized['num_drivable_area'] = num_drivable_area

In [31]:
# Required: Filter only for images where weather == "clear", scene == "highway", and timeofday == "daytime". 
# Note: A better implementation may be to filter the dataframe prior to counting the objects per image, 
# but it may be useful to keep the data_normalized and object counts for each image in case we want to use different filtering values.

data_cleaned = data_normalized[(data_normalized['attributes.weather'] == "clear") & 
               (data_normalized['attributes.scene'] == "highway") & 
               (data_normalized['attributes.timeofday'] == "daytime")]
data_cleaned

Unnamed: 0,name,timestamp,labels,attributes.weather,attributes.scene,attributes.timeofday,num_objects,num_sidewalk,num_building,num_wall,...,num_person,num_rider,num_car,num_truck,num_bus,num_train,num_motorcycle,num_bicycle,num_lanes,num_drivable_area
10,00067cfb-caba8a02.jpg,10000,"[{'category': 'traffic light', 'attributes': {...",clear,highway,daytime,29,0,0,0,...,0,0,3,0,1,0,0,0,11,3
62,002d290d-89f4e5c0.jpg,10000,"[{'category': 'car', 'attributes': {'occluded'...",clear,highway,daytime,23,0,0,0,...,0,0,6,0,0,1,0,0,10,2
88,003baca5-aab2e274.jpg,10000,"[{'category': 'traffic sign', 'attributes': {'...",clear,highway,daytime,6,0,0,0,...,0,0,2,0,0,0,0,0,1,1
89,003baca5-ad660439.jpg,10000,"[{'category': 'car', 'attributes': {'occluded'...",clear,highway,daytime,15,0,0,0,...,0,0,10,1,0,0,0,0,2,1
92,003e23ee-07d32feb.jpg,10000,"[{'category': 'traffic sign', 'attributes': {'...",clear,highway,daytime,28,0,0,0,...,0,0,6,1,1,0,0,0,8,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69675,b18cb922-e3af77af.jpg,10000,"[{'category': 'traffic sign', 'attributes': {'...",clear,highway,daytime,18,0,0,0,...,0,0,5,0,0,0,0,0,9,2
69805,fde2db45-f6e2fbd1.jpg,10000,"[{'category': 'car', 'attributes': {'occluded'...",clear,highway,daytime,11,0,0,0,...,0,0,4,0,0,0,0,0,6,1
69809,fde816b0-1b0f1a85.jpg,10000,"[{'category': 'traffic sign', 'attributes': {'...",clear,highway,daytime,29,0,0,0,...,0,0,8,0,0,0,0,0,9,2
69812,fdebe7ab-8409a734.jpg,10000,"[{'category': 'car', 'attributes': {'occluded'...",clear,highway,daytime,40,0,0,0,...,0,0,12,0,0,0,0,0,8,2


# 2. Testing

# 3. Determine the pixel locations of the objects in the image, specifically drivable area