In [1]:
import os

import torch
from torch.utils import data
from torchvision.datasets.folder import pil_loader
from torchvision import tv_tensors

from helpers.utils import load_json

class_map = {
    "bus": 1,
    "car": 2,
    "motor": 3,
    "person": 4,
    "rider": 5,
    "traffic light": 6,
    "traffic sign": 7,
    "bike": 8,
    "truck": 9,
}

class BDDDataset(data.Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.train = train
        self.transform = transform
        self.samples = None
        self.prepare()

    def prepare(self):
        self.samples = []

        if self.train:
            annotations = load_json(os.path.join(self.root, "labels/bdd100k_labels_images_train.json"))
            annotations = annotations[:1000]
            image_dir = os.path.join(self.root, "images/100k/train")
        else:
            annotations = load_json(os.path.join(self.root, "labels/bdd100k_labels_images_val.json"))
            annotations = annotations[:10000]
            image_dir = os.path.join(self.root, "images/100k/val")

        for (idx, ann) in enumerate(annotations):
            ## filter instances of "lane" and "drivable_area", because they have poly2d instead of box2d
            invalid_idxs = [i for i, x in enumerate(ann["labels"]) if x["category"] in ["lane", "drivable area", "train"]]
            if len(invalid_idxs) == len(ann["labels"]):
                continue
            
            ann["labels"] = [ann["labels"][i] for i in range(len(ann["labels"])) if i not in invalid_idxs]
            
            target = {}
            target["boxes"] = [ann['labels'][i]['box2d'] for i in range(len(ann['labels']))]
            target["boxes"] = [[box["x1"], box["y1"], box["x2"], box["y2"]] for box in target["boxes"]]
            target["labels"] = [class_map[ann['labels'][i]['category']] for i in range(len(ann['labels']))]
            target["image_id"] = idx + 1
            target["area"] = [(box[3] - box[1]) * (box[2] - box[0]) for box in target["boxes"]]
            target["iscrowd"] = [0 for _ in target["boxes"]]
            # no mask        
            
            image_path = os.path.join(image_dir, ann["name"])
                        
            if os.path.exists(image_path):
                self.samples.append((image_path, target))
            else:
                raise FileNotFoundError

    def __getitem__(self, index):
        image_path, annotation = self.samples[index]

        image = pil_loader(image_path)

        if self.transform is not None:
            image = self.transform(image)
            
        # image = tv_tensors.Image(image)
        target = {}
        target["boxes"] = torch.tensor(annotation["boxes"], dtype=torch.float).clone().detach()
        target["labels"] = torch.tensor(annotation["labels"], dtype=torch.int64).clone().detach()
        target["area"] = torch.tensor(annotation["area"], dtype=torch.float).clone().detach()
        target["iscrowd"] = torch.tensor(annotation["iscrowd"], dtype=torch.int64).clone().detach()
        target["image_id"] = annotation["image_id"]

        return image, target

    def __len__(self):
        return len(self.samples)

def custom_collate_fn(batch):
    images, annotations = zip(*batch)
    images = data.dataloader.default_collate(images)
    annotations = list(annotations)
    return images, annotations


In [2]:
from torchvision import transforms

# transform = transforms.Compose([transforms.Resize(64), transforms.ToTensor()])
transform = transforms.Compose([transforms.ToTensor()])
loader_train = data.DataLoader(
    BDDDataset("../data/bdd100k", transform=transform), batch_size=32, shuffle=True, num_workers=10, collate_fn=custom_collate_fn
)

loader_val = data.DataLoader(
    BDDDataset("../data/bdd100k", transform=transform, train=False), batch_size=96, shuffle=False, num_workers=10, collate_fn=custom_collate_fn
)

In [3]:
# images, annotations = next(iter(loader_train))
# model.train()
# output = model(images, annotations)

In [4]:
# ## visualize the bboxes 

# import matplotlib.pyplot as plt
# import matplotlib.patches as patches

# def draw_boxes(image, annotation):
#     """
#     Draws bounding boxes on the image.
#     :param image: PIL image
#     :param annotation: Annotation data for the image
#     :return: Image with bounding boxes
#     """
#     # Convert PIL Image to a matplotlib object
#     fig, ax = plt.subplots(1)
#     ax.imshow(image)

#     for box in annotation['boxes']:
#         rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=1, edgecolor='r', facecolor='none')
#         ax.add_patch(rect)

#     return fig


# for i, (images, annotations) in enumerate(loader_val):
#     if i == 0:  # Visualize the first batch
#         for j in range(len(images)):
#             image = transforms.functional.to_pil_image(images[j])
#             annotation = annotations[j]
#             fig = draw_boxes(image, annotation)
#             plt.show()
#         break


In [5]:
# vals = load_json(os.path.join("../bdd100k", "labels/bdd100k_labels_images_val.json"))
# classes = []
# for val in vals:
#     for label in val['labels']:
#         classes.append(label['category'])
        
# from collections import Counter
# counts = Counter(classes)
# # hbar
# import matplotlib.pyplot as plt
# plt.barh(list(counts.keys()), list(counts.values()))

In [6]:
## do a baseline model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN_MobileNet_V3_Large_FPN_Weights

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)

num_classes = 9 + 1
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


## num of params
print(f"train params / params  = {sum(p.numel() for p in model.parameters() if p.requires_grad)} / {sum(p.numel() for p in model.parameters())}")

train params / params  = 18912333 / 18971229


In [7]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
          )
        )
      )
      (2): InvertedResidual(
        (block):

In [8]:
from helpers.engine import train_one_epoch, evaluate

In [9]:
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.Adam(params, lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


model = model.to(device)

In [10]:
# model.train()
# for (images, targets) in loader_val:
#     images = list(image.to(device) for image in images)
#     targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
#     outputs = model(images, targets)
#     break

In [11]:
# model.eval()
# for (images, annotations) in loader_train:
#     images = list(image.to(device) for image in images)
#     annotations = [{k: v.to(device) for k, v in t.items() if isinstance(v, torch.Tensor)} for t in annotations]
#     outputs = model(images)
#     break

In [12]:
from torchvision.ops import box_iou
from netcal.metrics import ECE


In [13]:
from helpers.coco_eval import CocoEvaluator
from helpers.coco_utils import get_coco_api_from_dataset
import pickle

# with open('bdd_coco_evaluator.pkl', 'rb') as f:
#     coco_evaluator = pickle.load(f)['coco']
    
iou_types = ["bbox"]
coco = get_coco_api_from_dataset(loader_val.dataset)
coco_evaluator = CocoEvaluator(coco, iou_types)

# with open('bdd_coco_evaluator.pkl', 'wb') as f:
#     pickle.dump({"coco": coco}, f)

100%|██████████| 10000/10000 [01:19<00:00, 125.41it/s]


creating index...
index created!


In [14]:
# model.train()
# images, targets = next(iter(loader_train))
# images = list(image.to(device) for image in images)
# targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
# loss_dict = model(images, targets)

In [15]:
import wandb
from datetime import datetime

expt_name = "baseline"
time_stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
run = wandb.init(project='bml-od', name=f"{expt_name}-{time_stamp}", config={
    "model": "fasterrcnn_mobilenet_v3_large_fpn",
    "dataset": "bdd100k",
    "data_subset": "5k",
    "model_subset": "all",
})

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnura-ortap[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [16]:
model.train()

full_loss_dict = []
full_ece_data = []
for images, targets in loader_train:
    images = list(image.to(device) for image in images)
    targets = [
        {
            k: v.to(device) if isinstance(v, torch.Tensor) else v
            for k, v in t.items()
        }
        for t in targets
    ]
    with torch.cuda.amp.autocast(False), torch.inference_mode():
        loss_dict, ece_data = model(images, targets)
        full_loss_dict.append(loss_dict)
        full_ece_data.append(ece_data)
    # break

In [23]:
final_loss_dict = {
    k: torch.stack([loss_dict[k] for loss_dict in full_loss_dict]).mean()
    for k in full_loss_dict[0].keys()
}

final_ece_data = {}
for layer in ['prop', 'det']:
    final_ece_data[layer] = {
        'probs': torch.hstack([ece_data[layer]['probs'] for ece_data in full_ece_data]),
        'labels': torch.hstack([ece_data[layer]['labels'] for ece_data in full_ece_data]),
    }

In [32]:
from netcal.metrics import ECE
from helpers.engine import get_metrics

In [29]:
prop_ece = ECE(bins=10).measure(final_ece_data['prop']['probs'].detach().cpu().numpy(), final_ece_data['prop']['labels'].detach().cpu().numpy())
det_ece = ECE(bins=10).measure(final_ece_data['det']['probs'].detach().cpu().numpy(), final_ece_data['det']['labels'].detach().cpu().numpy())

In [36]:
from tqdm import tqdm
def get_metrics(model, data_loader, device):
    model.train()

    full_loss_dict = []
    full_ece_data = []
    for images, targets in tqdm(data_loader):
        images = list(image.to(device) for image in images)
        targets = [
            {
                k: v.to(device) if isinstance(v, torch.Tensor) else v
                for k, v in t.items()
            }
            for t in targets
        ]
        with torch.cuda.amp.autocast(False), torch.inference_mode():
            loss_dict, ece_data = model(images, targets)
            full_loss_dict.append(loss_dict)
            full_ece_data.append(ece_data)

    final_ece_data = {}
    for layer in ['prop', 'det']:
        final_ece_data[layer] = {
            'probs': torch.hstack([ece_data[layer]['probs'] for ece_data in full_ece_data]),
            'labels': torch.hstack([ece_data[layer]['labels'] for ece_data in full_ece_data]),
        }
        
    metrics = {
        "losses": {
        k: torch.stack([loss_dict[k] for loss_dict in full_loss_dict]).mean()
        for k in full_loss_dict[0].keys()
    },
        "ece_data": final_ece_data
    }

    return metrics

In [39]:
t_metrics = get_metrics(model, loader_train, device)
v_metrics = get_metrics(model, loader_val, device)

train_prop_ece = ECE(bins=10).measure(t_metrics['ece_data']['prop']['probs'].detach().cpu().numpy(), t_metrics['ece_data']['prop']['labels'].detach().cpu().numpy())
train_det_ece = ECE(bins=10).measure(t_metrics['ece_data']['det']['probs'].detach().cpu().numpy(), t_metrics['ece_data']['det']['labels'].detach().cpu().numpy())

val_prop_ece = ECE(bins=10).measure(v_metrics['ece_data']['prop']['probs'].detach().cpu().numpy(), v_metrics['ece_data']['prop']['labels'].detach().cpu().numpy())
val_det_ece = ECE(bins=10).measure(v_metrics['ece_data']['det']['probs'].detach().cpu().numpy(), v_metrics['ece_data']['det']['labels'].detach().cpu().numpy())

run.log({
    "train_losses": t_metrics["losses"],
    "val_losses": v_metrics["losses"],
    "train_prop_ece": train_prop_ece,
    "train_det_ece": train_det_ece,
    "val_prop_ece": val_prop_ece,
    "val_det_ece": val_det_ece,
})

100%|██████████| 105/105 [01:52<00:00,  1.07s/it]


In [41]:
t_metrics["losses"]

{'loss_classifier': tensor(2.5136, device='cuda:0'),
 'loss_box_reg': tensor(0.7167, device='cuda:0'),
 'loss_objectness': tensor(0.1477, device='cuda:0'),
 'loss_rpn_box_reg': tensor(0.1049, device='cuda:0')}

In [45]:
run.log({
    "train_losses": t_metrics["losses"],
    "val_losses": v_metrics["losses"],
    "train_prop_ece": train_prop_ece,
    "train_det_ece": train_det_ece,
    "val_prop_ece": val_prop_ece,
    "val_det_ece": val_det_ece,
})

In [44]:
run = wandb.init(project='bml-od', name=f"{expt_name}-{time_stamp}2", config={
    "model": "fasterrcnn_mobilenet_v3_large_fpn",
    "dataset": "bdd100k",
    "data_subset": "5k",
    "model_subset": "all",
})

In [60]:
from diagla.curvatures import Diagonal

In [61]:
diag = Diagonal(model, layer_types=['Linear'])

In [65]:
from helpers import utils
model.train()
lr_scheduler = None
for images, targets in tqdm(loader_train):
    images = list(image.to(device) for image in images)
    targets = [
        {
            k: v.to(device) if isinstance(v, torch.Tensor) else v
            for k, v in t.items()
        }
        for t in targets
    ]
    with torch.cuda.amp.autocast(False):
        loss_dict, ece_data = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

    # reduce losses over all GPUs for logging purposes
    loss_dict_reduced = utils.reduce_dict(loss_dict)
    losses_reduced = sum(loss for loss in loss_dict_reduced.values())
    loss_value = losses_reduced.item()

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    
    diag.update(batch_size=len(images))

    if lr_scheduler is not None:
        lr_scheduler.step()
        
    


100%|██████████| 157/157 [01:15<00:00,  2.07it/s]


In [75]:
diag.invert(add=0.1, multiply=1)

In [76]:
diag.sample(1000)

KeyError: 1000

In [58]:
# for name, param in model.named_parameters():
#     print(name, param.shape)

In [47]:
la

<laplace.lllaplace.KronLLLaplace at 0x7f066866abc0>

In [None]:


la.optimize_prior_precision(method="marglik")

In [25]:
final_loss_dict, final_ece_data

({'loss_classifier': tensor(2.5136, device='cuda:0'),
  'loss_box_reg': tensor(0.7163, device='cuda:0'),
  'loss_objectness': tensor(0.1479, device='cuda:0'),
  'loss_rpn_box_reg': tensor(0.1048, device='cuda:0')},
 {'prop': {'probs': tensor([0.5536, 0.6188, 0.5925,  ..., 0.0371, 0.9870, 0.2780], device='cuda:0'),
   'labels': tensor([1., 1., 1.,  ..., 0., 0., 0.], device='cuda:0')},
  'det': {'probs': tensor([0.1404, 0.1438, 0.1657,  ..., 0.1734, 0.1506, 0.1387], device='cuda:0'),
   'labels': tensor([2, 2, 2,  ..., 2, 7, 7], device='cuda:0')}})

In [None]:
# let's train it just for 2 epochs
num_epochs = 5

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_metrics = train_one_epoch(model, optimizer, loader_train, device, epoch, print_freq=10, wandbrun=run)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    eval_metrics = evaluate(model, loader_val, device, coco_evaluator)
    break


In [None]:
t1 = get_metrics(model, loader_train, device)

In [None]:
full_loss_dict, full_ece_data = get_metrics(model, loader_val, device)

In [None]:
full_ece_data[1]['det']['labels'][0].shape

In [None]:
[torch.vstack(full_ece_data[i]['prop']['probs']) for i in range(len(full_ece_data))]

In [None]:
for k in ['prop', 'det']:
    probs = torch.hstack([full_ece_data[i][k]['probs'][0] for i in range(len(full_ece_data))])
    labels = torch.hstack([full_ece_data[i][k]['labels'][0] for i in range(len(full_ece_data))])
    # print(probs)
    

In [None]:
probs.shape, labels.shape

In [None]:
{
    k: {
         "probs": torch.vstack([full_ece_data[0][k][i]["probs"] for i in range(len(full_ece_data[0][k]))]),
         "labels": torch.hstack([full_ece_data[0][k][i]["labels"] for i in range(len(full_ece_data[0][k]))]),
    }
    for k in full_ece_data[0].keys()
}

In [None]:
from tqdm import tqdm
def get_metrics(model, data_loader, device):
    model.train()

    full_loss_dict = []
    full_ece_data = []
    for images, targets in tqdm(data_loader):
        images = list(image.to(device) for image in images)
        targets = [
            {
                k: v.to(device) if isinstance(v, torch.Tensor) else v
                for k, v in t.items()
            }
            for t in targets
        ]
        with torch.cuda.amp.autocast(False), torch.inference_mode():
            loss_dict, ece_data = model(images, targets)
            full_loss_dict.append(loss_dict)
            full_ece_data.append(ece_data)

    # metrics = {
    #     "losses": {
    #         k: torch.stack([d[k] for d in full_loss_dict]).mean().item()
    #         for k in full_loss_dict[0].keys()
    #     },
    #     "ece_data": {
    #         k: torch.vstack([d[k] for d in full_ece_data])
    #         for k in full_ece_data[0].keys()
    #     }
    # }
    
    

    return full_loss_dict, full_ece_data

In [None]:
images, targets = next(iter(loader_val))

model.train()
outputs = model(images, targets)

In [None]:
train_metrics.meters['loss']

In [None]:
eval0.summarize()

In [None]:
outputs = model(images)

In [None]:
images, targets = next(iter(loader_train))


In [None]:
annotations = load_json(os.path.join("../data/bdd100k/", "labels/bdd100k_labels_images_val.json"))



In [None]:
categories = []
for ann in annotations:
    for label in ann['labels']:
        categories.append(label['category'])
    
from collections import Counter
print(Counter(categories))