In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import os
import json
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from engine import train_one_epoch, evaluate
from torch.utils.data import TensorDataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image

In [2]:
train_path = "../bdd100k/images/100k/train"
val_path = "../bdd100k/images/100k/val"

train_json = "../bdd100k/images/100k/labels/det_train.json"
val_json = "../bdd100k/images/100k/labels/det_val.json"

Check JSON files. What is the format? What is provided?

In [3]:
with open(train_json) as file:
    train_labels = json.load(file)

In [4]:
# train labels is a list
train_labels[0].keys()

dict_keys(['name', 'attributes', 'timestamp', 'labels'])

In [5]:
train_labels[0]["name"]

'0000f77c-6257be58.jpg'

In [6]:
train_labels[0]["attributes"]

{'weather': 'clear', 'timeofday': 'daytime', 'scene': 'city street'}

In [7]:
# Each image comes with a list of labels

# "box2d" comtains a dictionary of x1, y1, x2, y2 coordinates which form the bounding box
train_labels[0]["labels"]

[{'id': '0',
  'attributes': {'occluded': False,
   'truncated': False,
   'trafficLightColor': 'G'},
  'category': 'traffic light',
  'box2d': {'x1': 1125.902264,
   'y1': 133.184488,
   'x2': 1156.978645,
   'y2': 210.875445}},
 {'id': '1',
  'attributes': {'occluded': False,
   'truncated': False,
   'trafficLightColor': 'G'},
  'category': 'traffic light',
  'box2d': {'x1': 1156.978645,
   'y1': 136.637417,
   'x2': 1191.50796,
   'y2': 210.875443}},
 {'id': '2',
  'attributes': {'occluded': False,
   'truncated': False,
   'trafficLightColor': 'NA'},
  'category': 'traffic sign',
  'box2d': {'x1': 1105.66915985699,
   'y1': 211.122087,
   'x2': 1170.79037,
   'y2': 233.566141}},
 {'id': '3',
  'attributes': {'occluded': False,
   'truncated': True,
   'trafficLightColor': 'NA'},
  'category': 'traffic sign',
  'box2d': {'x1': 0.0, 'y1': 0.246631, 'x2': 100.381647, 'y2': 122.825696}},
 {'id': '4',
  'attributes': {'occluded': False,
   'truncated': False,
   'trafficLightColor': 'N

In [8]:
for lst in train_labels:
    
    for item in lst["labels"]:
        
        if "crowd" in item["attributes"]:
            
            print(item)

{'id': '86', 'attributes': {'occluded': True, 'truncated': False, 'crowd': False, 'trafficLightColor': 'NA'}, 'category': 'pedestrian', 'box2d': {'x1': 801.6364413044971, 'y1': 366.3120859236561, 'x2': 822.6767678479223, 'y2': 427.329032899589}}
{'id': '274', 'attributes': {'occluded': True, 'truncated': False, 'crowd': False, 'trafficLightColor': 'NA'}, 'category': 'pedestrian', 'box2d': {'x1': 916.306220966164, 'y1': 356.8439389791148, 'x2': 935.2425148552467, 'y2': 434.6931471897878}}
{'id': '2926', 'attributes': {'occluded': True, 'truncated': False, 'crowd': False, 'trafficLightColor': 'NA'}, 'category': 'car', 'box2d': {'x1': 390.298057380536, 'y1': 355.79192265194354, 'x2': 471.30331457272274, 'y2': 393.6645104301088}}
{'id': '3873', 'attributes': {'occluded': True, 'truncated': False, 'crowd': False, 'trafficLightColor': 'NA'}, 'category': 'car', 'box2d': {'x1': 103.0976000627831, 'y1': 311.6072369107508, 'x2': 149.38631845831836, 'y2': 342.1157103987172}}
{'id': '4079', 'attri

KeyError: 'labels'

For object detection, 10 classes are evaluated

In [9]:
label_dict = {
                1: "pedestrian",
                2: "rider",
                3: "car",
                4: "truck",
                5: "bus",
                6: "train",
                7: "motorcycle",
                8: "bicycle",
                9: "traffic light",
                10: "traffic sign"
             }
label_dict = {label_dict[item]:item for item in label_dict}
label_dict

{'pedestrian': 1,
 'rider': 2,
 'car': 3,
 'truck': 4,
 'bus': 5,
 'train': 6,
 'motorcycle': 7,
 'bicycle': 8,
 'traffic light': 9,
 'traffic sign': 10}

Want to train on images whose weather attribute is sunny.

In [10]:
class bdd100k_dataset(torch.utils.data.Dataset):
    
    def __init__(self, filenames, json_labels, path):
        self.filenames = filenames
        self.json_labels = json_labels
        self.path = path
        self.transform = transforms.Compose([transforms.ToTensor()])
                                            
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        
        filename = os.path.join(self.path, self.filenames[idx])
        img = Image.open(filename).convert("RGB")
        
        assert(self.filenames[idx] == self.json_labels[idx]["name"])

        labels = self.json_labels[idx]["labels"] # list of dictionaries

        target = {}

        boxes = []
        categories = []
        crowded = []
        areas = []
        for item in labels:
            
            if item["category"] not in label_dict.keys():
                continue
            
            box2d = item["box2d"]
            
            x1 = box2d["x1"]
            y1 = box2d["y1"]
            
            x2 = box2d["x2"]
            y2 = box2d["y2"]
            
            boxes.append([x1, y1, x2, y2])
            categories.append(label_dict[item["category"]])
            crowded.append((False if "crowd" not in item["attributes"] else item["attributes"]["crowd"]))
            areas.append(abs(x1 - x2) * abs(y1 - y2))
        
        target["boxes"] = torch.tensor(boxes, dtype=torch.float32)
        
        target["labels"] = torch.tensor(categories, dtype=torch.int64)
        target["image_id"] = torch.tensor([idx], dtype=torch.int64)
        target["area"] = torch.tensor(areas, dtype=torch.float32)
        target["iscrowd"] = torch.tensor(crowded, dtype=torch.uint8)
        
        return self.transform(img), target

In [11]:
trainX_filenames = []
train_json = []

# get all images whose weather attribute is clear
for item in train_labels:

    # if the weather is clear
    if item["attributes"]["weather"] == "clear":

        trainX_filenames.append(item["name"])
        train_json.append(item)

assert len(trainX_filenames) == len(train_json)
print("Number of `clear` training samples: %s" % len(trainX_filenames))

Number of `clear` training samples: 37344


In [12]:
def collate_fn(batch):
    return tuple(zip(*batch))

dataset = bdd100k_dataset(trainX_filenames, 
                          train_json, 
                          train_path)

loader = DataLoader(dataset, batch_size=64,
                    shuffle=True, 
                    collate_fn=collate_fn)

In [13]:
def train_model(model, dataloader,
                criterion, optimizer, 
                scheduler, num_epochs, device):
    
    # switch to training mode
    model.train()
    
    # send params to device
    model = model.to(device)

    for epoch in range(num_epochs):
        
        print("epoch: %s" % epoch)

        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, dataloader, device,
                       epoch, print_freq=10)
        
        # update learning rate
        scheduler.step()

    return model

In [14]:
# Instantiate pretrained model

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# label 0 is reserved for the background class
num_classes = 11

in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
# Train the model on "clear" images

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_model(model, loader,
            criterion, optimizer, 
            scheduler, 
            num_epochs, device)


In [None]:
# save the trained model

# iterate through all of the domains in the validation set

# make a data loader

# evaluate 

# write to 

In [15]:
weather_domains = ["rainy", "snowy", "clear", "overcast", "partly cloudy", "foggy"]

In [16]:
with open(val_json) as file:
    val_labels = json.load(file)

In [None]:
# for each weather domain
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for weather_attr in weather_domains:
    
    ##################################
    # Get all validation images and  #
    # create dataloader              #
    ##################################
    
    filenames = []
    json = []

    # get all images whose weather attribute is clear
    for item in val_labels:
        
        if "labels" not in item:
            continue

        # if the weather is clear
        if item["attributes"]["weather"] == weather_attr:

            filenames.append(item["name"])
            json.append(item)

    assert len(filenames) == len(json)
    print("Number of %s samples: %s" % (weather_attr, len(filenames)))
    
    dataset = bdd100k_dataset(filenames, 
                              json, 
                              val_path)

    loader = DataLoader(dataset, batch_size=64,
                        shuffle=True, 
                        collate_fn=collate_fn)
    
    #######################
    # evaluate on dataset #
    #######################
    
    evaluate(model, loader, device=device)
    
    

Number of rainy samples: 738
creating index...
index created!


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Test:  [ 0/12]  eta: 2:02:44  model_time: 611.9090 (611.9090)  evaluator_time: 0.3022 (0.3022)  time: 613.6703  data: 1.4485
