## Train, evaluate ssd on uadetrac and see how it performs in terms of precision and recall (unlabeled boxes)

In [2]:
## first let's load the data and corresponding boxes / labels for training

%pylab inline
%load_ext autoreload
%autoreload 2

import os
import sys
sys.argv=['']
sys.path.append('../../')


import numpy as np
import utils.helpers as helpers
from loaders.uadetrac_loader import UADetracLoader


Populating the interactive namespace from numpy and matplotlib


In [3]:
# load the data  
loader = UADetracLoader()
images = loader.load_cached_images()
labels = loader.load_cached_labels()
video_start_indices = loader.get_video_start_indices()

In [4]:
print(images.shape)

(10421, 300, 300, 3)


In [5]:
print(video_start_indices)

[ 664  936  437  784  800  800  906  694  800  800  800  800 1200]


In [11]:
## Starting modification of train_ssd.py for ua-detrac
## importing all relevant files
import argparse
import os
import logging
import sys
import itertools
import torch
import config as jaeho_config
from torch.utils.data import DataLoader, ConcatDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR


from eva_storage.external.ssd.vision.utils.misc import str2bool, Timer, freeze_net_layers, store_labels
from eva_storage.external.ssd.vision.ssd.ssd import MatchPrior
from eva_storage.external.ssd.vision.ssd.vgg_ssd import create_vgg_ssd
from eva_storage.external.ssd.vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd
from eva_storage.external.ssd.vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite
from eva_storage.external.ssd.vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite
from eva_storage.external.ssd.vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite
#from eva_storage.external.ssd.vision.datasets.voc_dataset import VOCDataset
#from eva_storage.external.ssd.vision.datasets.open_images import OpenImagesDataset
from eva_storage.external.ssd.vision.nn.multibox_loss import MultiboxLoss
from eva_storage.external.ssd.vision.ssd.config import vgg_ssd_config
from eva_storage.external.ssd.vision.ssd.config import mobilenetv1_ssd_config
from eva_storage.external.ssd.vision.ssd.config import squeezenet_ssd_config
from eva_storage.external.ssd.vision.ssd.data_preprocessing import TrainAugmentation, TestTransform



DEVICE = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")


if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    logging.info("Use Cuda.")



In [10]:
#dataset_path = "/home/jbang36/data/VOCdevkit/VOC2007"
#validation_path = "/home/jbang36/data/VOCdevkit/VOC2007"
base_net = "models/vgg16_reducedfc.pth"
batch_size = 24
num_workers = 4
num_epochs = 200
checkpoint_folder = 'models/'
lr = 1e-3
momentum = 0.9
weight_decay =5e-4
validation_epochs = 5
debug_steps = 100

timer = Timer()

In [12]:
## Load the model

num_classes = 4
net = create_net(num_classes)
min_loss = -10000.0
last_epoch = -1
base_net_lr = lr
extra_layers_lr = lr


print("Base net is frozen..")
freeze_net_layers(net.base_net)
params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(),
                         net.regression_headers.parameters(), net.classification_headers.parameters())
params = [
    {'params': itertools.chain(
        net.source_layer_add_ons.parameters(),
        net.extras.parameters()
    ), 'lr': extra_layers_lr},
    {'params': itertools.chain(
        net.regression_headers.parameters(),
        net.classification_headers.parameters()
    )}
]


#net.init_from_base_net(base_net)
## loading from pretrained model!!
pretrained_ssd_dir = '/nethome/jbang36/eva/eva_storage/external/ssd/models/vgg16-ssd-Epoch-149-Loss-3.3744568502269505.pth'

net.init_from_pretrained_ssd(pretrained_ssd_dir)


net.to(DEVICE)

print("Done loading to GPU")


NameError: name 'create_net' is not defined

In [None]:
## Load the rest (optimizer, loss function, etc)

criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3,
                         center_variance=0.1, size_variance=0.2, device=DEVICE)
optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9,
                            weight_decay=5e-4)


milestones = [int(v.strip()) for v in "80,100".split(",")]
scheduler = MultiStepLR(optimizer, milestones=milestones,
                        gamma=0.1, last_epoch=last_epoch)

In [None]:
filter_path = os.path.join(home_dir, 'filters')
loader_path = os.path.join(home_dir, 'loaders')


sys.path.append(home_dir)
sys.path.append(loader_path)
sys.path.append(filter_path)
util_path = os.path.join(home_dir, 'others', 'jupyter', 'core')

sys.path.append(util_path)

from utils import *
## I don't think you need to normalize....

X_train_norm, X_test_norm, Y_train_dict, Y_test_dict = get_uadetrac()
anno_dir = os.path.join(root,'data', 'ua_detrac','small-annotations')
boxes_dataset = get_boxes(anno_dir, width = 300, height = 300)
y_train = Y_train_dict['vehicle_type']
y_test = Y_test_dict['vehicle_type']

division = len(X_train_norm)
y_train_boxes = boxes_dataset[:division]
y_test_boxes = boxes_dataset[division:]



In [None]:
## We need to make val dataset...

val_division = int(0.8 * len(X_train_norm))
X_val_norm = X_train_norm[val_division:]
X_train_norm = X_train_norm[:val_division]
y_val_boxes = y_train_boxes[val_division:]
y_train_boxes = y_train_boxes[:val_division]
y_val = y_train[val_division:]
y_train = y_train[:val_division]

In [None]:
train_dataset = UADataset_lite(transform=train_transform, target_transform=target_transform)
X_train = X_train_norm * 255.0
X_train = X_train.astype(np.uint8)

train_dataset.set_x(X_train)
train_dataset.set_y(y_train)
train_dataset.set_y_boxes(y_train_boxes)


In [None]:
X_val = X_val_norm * 255.0
X_val = X_val.astype(np.uint8)
val_dataset = UADataset_lite(transform=train_transform, target_transform=target_transform)
val_dataset.set_x(X_val_norm)
val_dataset.set_y(y_val)
val_dataset.set_y_boxes(y_val_boxes)

X_test = X_test_norm * 255.0
X_test = X_test.astype(np.uint8)
test_dataset = UADataset_lite(transform=train_transform, target_transform=target_transform)
test_dataset.set_x(X_test)
test_dataset.set_y(y_test)
test_dataset.set_y_boxes(y_test_boxes)

In [None]:
## convert to loader 
batch_size = 24
train_loader = DataLoader(train_dataset, batch_size,
                          num_workers= 4,
                          shuffle=True)

val_loader = DataLoader(val_dataset, batch_size,
                          num_workers= 4,
                          shuffle=True)

In [None]:
## defining train / test functions

def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
    net.train(True)
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    for i, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = net(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)  # TODO CHANGE BOXES
        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
        if i and i % debug_steps == 0:
            avg_loss = running_loss / debug_steps
            avg_reg_loss = running_regression_loss / debug_steps
            avg_clf_loss = running_classification_loss / debug_steps
            logging.info(
                f"Epoch: {epoch}, Step: {i}, " +
                f"Average Loss: {avg_loss:.4f}, " +
                f"Average Regression Loss {avg_reg_loss:.4f}, " +
                f"Average Classification Loss: {avg_clf_loss:.4f}"
            )
            running_loss = 0.0
            running_regression_loss = 0.0
            running_classification_loss = 0.0


def test(loader, net, criterion, device):
    net.eval()
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    num = 0
    for _, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        num += 1

        with torch.no_grad():
            confidence, locations = net(images)
            regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
            loss = regression_loss + classification_loss

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
    return running_loss / num, running_regression_loss / num, running_classification_loss / num


In [None]:
## evaluate the model
## let's just import utils and do all evaluations here?
import time

st = time.time()

for epoch in range(last_epoch + 1, num_epochs):
    scheduler.step()
    train(train_loader, net, criterion, optimizer,
          device=DEVICE, debug_steps=debug_steps, epoch=epoch)

    if epoch % validation_epochs == 0 or epoch == num_epochs - 1:
        net.eval()
        running_loss = 0.0
        running_regression_loss = 0.0
        running_classification_loss = 0.0
        num = 0
        for _, data in enumerate(val_loader):
            images, boxes, labels = data
            images = images.to(DEVICE)
            boxes = boxes.to(DEVICE)
            labels = labels.to(DEVICE)
            num += 1
            with torch.no_grad():
                confidence, locations = net(images)
                regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
                loss = regression_loss + classification_loss
            running_loss += loss.item()
            running_regression_loss += regression_loss.item()
            running_classification_loss += classification_loss.item()
        val_loss = running_loss / num
        val_regression_loss = running_regression_loss / num
        val_classification_loss = running_regression_loss / num
        print("epoch", epoch)
        print("  Validation Loss: {v:.4f}".format(v=val_loss))
        print("  Validatiion Regression Loss: {v:.4f}".format(v=val_regression_loss))
        print("  Validation Classification Loss: {v:.4f}".format(v=val_classification_loss))
        
        checkpoint_file_name = "vgg16-ssd:epoch" + str(epoch)
        model_path = os.path.join(checkpoint_folder, checkpoint_file_name)
        net.save(model_path)
        
print("Total time to train...", time.time() - st, "seconds")


In [None]:
test_loader = DataLoader(test_dataset, batch_size,
                          num_workers= 4,
                          shuffle=False)


In [None]:
for _, data in enumerate(test_loader):
    images, boxes, labels = data
    images = images.to(DEVICE)
    boxes = boxes.to(DEVICE)
    labels = labels.to(DEVICE)
    with torch.no_grad():
        confidence, locations = net(images)
    break
    
## we now have images, boxes, labels, confidence, locations

In [None]:
import vision.utils as ssd_utils
## need to convert locations to boxes
# I think the center_form_to_corner_form might be wrong??


predicted_locations = locations.cpu()

predicted_boxes = ssd_utils.box_utils.convert_locations_to_boxes(predicted_locations, config.priors, config.center_variance, config.size_variance)
predicted_boxes = ssd_utils.box_utils.center_form_to_corner_form(predicted_boxes)

gt_locations = boxes.cpu() ## they have converted boxes to locations
gt_boxes = ssd_utils.box_utils.convert_locations_to_boxes(gt_locations, config.priors, config.center_variance, config.size_variance)


gt_boxes = ssd_utils.box_utils.center_form_to_corner_form(gt_boxes)

In [None]:
"""
For some reason I can't figure out, rectangles are not being drawn on the images
"""

n_samples = predicted_boxes.size(0)
print(n_samples)
image_size = 300
color = (255, 0,0)

rows = 5
cols = 3
amp = 1

fig, axes = plt.subplots(rows, cols, figsize = (30*amp,30*amp))


for i in range(n_samples):
    img_tmp = np.ndarray(shape = (300,300,3), dtype = np.uint8)
    image = images_cpu[i]
    predicted_boxes_reshaped = predicted_boxes[i][pos_mask[i], :]
    gt_boxes_reshaped = gt_boxes[i][pos_mask[i], :]
    
    
    image = image.permute(1,2,0)
    image_np = image.numpy().astype(np.uint8)
    image_cp = np.copy(image_np)
    
    
    axes[i,0].imshow(image_np)
    n_rects = predicted_boxes_reshaped.size(0)
    pbr = (predicted_boxes_reshaped.numpy() * image_size).astype(np.uint8)
    gbr = (gt_boxes_reshaped.numpy() * image_size).astype(np.uint8)
    for j in range(n_rects):
        #should be left, top, right, bottom
        # let's draw the ground boxes to be sure
        
        cv2.rectangle(image_np, (pbr[j][0], pbr[j][1]), (pbr[j][2], pbr[j][3]), color, 2)
            
    axes[i,1].imshow(image_np)
    
    n_rects = gt_boxes_reshaped.size(0)
    for j in range(n_rects):
        cv2.rectangle(image_cp, (gbr[j][0], gbr[j][1]), (gbr[j][2], gbr[j][3]), color, 2)
        
    
    axes[i,2].imshow(image_cp)
    
    if i == rows - 1:
        break
        

In [None]:
## we want to do things image by image... let's organize the boxes into list
predicted_boxes_list = []
gt_boxes_list = []



pos_mask = labels > 0
predicted_boxes_reshaped = predicted_boxes[pos_mask, :]
print(predicted_boxes_reshaped.size())
gt_boxes_reshaped = gt_boxes[pos_mask, :]
print(gt_boxes_reshaped.size())
## I guess all we need is the statistics so convert them all and then see


In [None]:
### Evaluation

## do code evaluation...

all_ground_boxes = []
all_proposed_boxes = []
all_confidence = []
all_labels = []
all_images = []

for _, data in enumerate(test_loader):
    images, boxes, labels = data
    images = images.to(DEVICE)
    boxes = boxes.to(DEVICE)
    labels = labels.to(DEVICE)
    with torch.no_grad():
        confidence, locations = net(images)
        all_proposed_boxes.append(locations)
        all_confidence.append(confidence)
        all_labels.append(labels)
        all_ground_boxes.append(boxes)
        all_images.append(images)
    


In [None]:
# each element in this array will be the batch result

assert(len(all_proposed_boxes) == len(all_confidence))
assert(len(all_proposed_boxes) == len(all_labels))
assert(len(all_proposed_boxes) == len(all_ground_boxes))
assert(len(all_proposed_boxes) == len(all_images))

def compute_stats(gt_boxes, proposed_boxes, iou=0.5):
    assert(gt_boxes.size() == proposed_boxes.size())
    iou_list = ssd_utils.box_utils.iou_of(gt_boxes, proposed_boxes)
    tmp = iou_list > iou
    
    tp = torch.sum(iou_list > iou)
    
    return tp.item(), tmp.size()

import vision.utils as ssd_utils

In [None]:
tp_all = 0
boxes_all = 0

for i, images in enumerate(all_images):
    predicted_locations = all_proposed_boxes[i].cpu()
    predicted_boxes = ssd_utils.box_utils.convert_locations_to_boxes(predicted_locations, config.priors, config.center_variance, config.size_variance)
    predicted_boxes = ssd_utils.box_utils.center_form_to_corner_form(predicted_boxes)

    gt_locations = all_ground_boxes[i].cpu()
    gt_boxes = ssd_utils.box_utils.convert_locations_to_boxes(gt_locations, config.priors, config.center_variance, config.size_variance)
    gt_boxes = ssd_utils.box_utils.center_form_to_corner_form(gt_boxes)

    labels = all_labels[i]
    pos_mask = labels > 0
    predicted_boxes_reshaped = predicted_boxes[pos_mask, :]
    gt_boxes_reshaped = gt_boxes[pos_mask, :]
    tp, all_size = compute_stats(gt_boxes_reshaped, predicted_boxes_reshaped)
    tp_all += tp
    boxes_all += all_size[0]
    
print(1.0 * tp_all / boxes_all)    

In [None]:
## looking at the images, I don't think this is a good measure 
## because it creates multiple boxes for each object...
## so we need to figure out a way to eliminate recurring boxes
# let's try evaluation method that is already implemented from eval_ssd.py

from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor


predictor = create_vgg_ssd_predictor(net, nms_method="hard", device=DEVICE)
results = []
dataset = test_dataset
for i in range(len(dataset)):
    print("process image", i)
    timer.start("Load Image")
    image = dataset.get_image(i)
    print("Load Image: {:4f} seconds.".format(timer.end("Load Image")))
    timer.start("Predict")
    boxes, labels, probs = predictor.predict(image)
    print("Prediction: {:4f} seconds.".format(timer.end("Predict")))
    indexes = torch.ones(labels.size(0), 1, dtype=torch.float32) * i
    results.append(torch.cat([
        indexes.reshape(-1, 1),
        labels.reshape(-1, 1).float(),
        probs.reshape(-1, 1),
        boxes   # + 1.0 matlab's indexes start from 1
    ], dim=1))
    
results = torch.cat(results)



In [None]:

class_names = ['car', 'bus', 'others', 'van']

for class_index, class_name in enumerate(class_names):
    if class_index == 0: continue  # ignore background
    sub = results[results[:, 1] == class_index, :]
    for i in range(sub.size(0)):
        prob_box = sub[i, 2:].numpy()
        print(sub[i,2:])  
        print(int(sub[i,0]))
        #image_id = dataset.ids[int(sub[i, 0])]
        image_id = "0"
        print(
            image_id + " " + " ".join([str(v) for v in prob_box])
        )
        break
aps = []
    

print("\n\nAverage Precision Per-class:")
for class_index, class_name in enumerate(class_names):
    iou_threshold = 0.5
    use_2007_metric = True
    ap = compute_average_precision_per_class(
        true_case_stat[class_index],
        all_gb_boxes[class_index],
        all_difficult_cases[class_index],
        prediction_path,
        iou_threshold, 
        use_2007_metric
    )
    aps.append(ap)
    
print(f"\nAverage Precision Across All Classes:{sum(aps)/len(aps)}")

In [None]:
# we need to accumulate the results..
# also if there are squares that are being used multiple times, we need to prevent that bc it's not fair to detect easy ones multiple times to get good numbers
# adapt this some how...
## TODO! This function is not finished...

matched = set()
for i, image_batch in enumerate(all_images):
    predicted_locations = all_proposed_boxes[i].cpu()
    predicted_boxes = ssd_utils.box_utils.convert_locations_to_boxes(predicted_locations, config.priors, config.center_variance, config.size_variance)
    predicted_boxes = ssd_utils.box_utils.center_form_to_corner_form(predicted_boxes)

    gt_locations = all_ground_boxes[i].cpu()
    gt_boxes = ssd_utils.box_utils.convert_locations_to_boxes(gt_locations, config.priors, config.center_variance, config.size_variance)
    gt_boxes = ssd_utils.box_utils.center_form_to_corner_form(gt_boxes)
    labels = all_labels[i]
    
    
    for j in range(image_batch.size(0)):
        image = image_batch[j]
        labels_frame = labels[j]
        gt_boxes_frame = gt_boxes[j]
        predicted_boxes_frame = predicted_boxes[j]
        
        pos_mask = labels_frame > 0
        predicted_boxes_reshaped = predicted_boxes[pos_mask, :]
        gt_boxes_reshaped = gt_boxes[pos_mask, :]
        tp, all_size = compute_stats(gt_boxes_reshaped, predicted_boxes_reshaped)
        tp_all += tp
        boxes_all += all_size[0]
        
for i, image_id in enumerate(image_ids):
    box = boxes[i]
    if image_id not in gt_boxes:
        false_positive[i] = 1
        continue

    gt_box = gt_boxes[image_id]
    ious = box_utils.iou_of(box, gt_box)
    max_iou = torch.max(ious).item() 
    # the only reason you can do this is if you assume there is one box per frame
    # however, uadetrac does not have one frame, it has multiple frames. 
    # we need to look into the transformer to see how everything changes
    max_arg = torch.argmax(ious).item()
    if max_iou > iou_threshold:
        if difficult_cases[image_id][max_arg] == 0:
            if (image_id, max_arg) not in matched:
                true_positive[i] = 1
                matched.add((image_id, max_arg))
            else:
                false_positive[i] = 1
    else:
        false_positive[i] = 1

    true_positive = true_positive.cumsum()
    false_positive = false_positive.cumsum()
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / num_true_cases
    if use_2007_metric:
        return measurements.compute_voc2007_average_precision(precision, recall)
    else:
        return measurements.compute_average_precision(precision, recall)



In [None]:
labels = all_labels[0]
predicted_locations = all_proposed_boxes[0]
gt_locations = all_ground_boxes[0]

import vision.utils as ssd_utils
## need to convert locations to boxes

predicted_locations = all_proposed_boxes[0].cpu()
predicted_boxes = ssd_utils.box_utils.convert_locations_to_boxes(predicted_locations, config.priors, config.center_variance, config.size_variance)
predicted_boxes = ssd_utils.box_utils.center_form_to_corner_form(predicted_boxes)

gt_locations = all_ground_boxes[0].cpu()
gt_boxes = ssd_utils.box_utils.convert_locations_to_boxes(gt_locations, config.priors, config.center_variance, config.size_variance)
gt_boxes = ssd_utils.box_utils.center_form_to_corner_form(gt_boxes)


pos_mask = labels > 0
predicted_boxes_reshaped = predicted_boxes[pos_mask, :]
print(predicted_boxes_reshaped.size())
gt_boxes_reshaped = gt_boxes[pos_mask, :]
print(gt_boxes_reshaped.size())
## I guess all we need is the statistics so convert them all and then see

### okay now we have to start matching??
### -> I don't think we need to manually match
### -> Let's just calculate the final score first to see what's up

def compute_stats(gt_boxes, proposed_boxes, iou=0.5):
    assert(gt_boxes.size() == proposed_boxes.size())
    iou_list = ssd_utils.box_utils.iou_of(gt_boxes, predicted_boxes)
    tmp = iou_list > iou
    print(tmp)
    
    tp = torch.sum(iou_list > iou)
    return tp

print(compute_stats(gt_boxes, predicted_boxes))


In [None]:

### okay now we have to start matching??
### -> I don't think we need to manually match
### -> Let's just calculate the final score first to see what's up

def compute_stats(gt_boxes, proposed_boxes, iou=0.5):
    assert(gt_boxes.size() == proposed_boxes.size())
    iou_list = ssd_utils.box_utils.iou_of(gt_boxes, predicted_boxes)
    tmp = iou_list > iou
    print(tmp)
    
    tp = torch.sum(iou_list > iou)
    return tp



In [None]:
print(smooth_l1_loss)
print(num_pos)
print(predicted_locations.size())
print(gt_locations.size())

## probably make a converting function....
# proposed_boxes = convertssd2ml(all_locations)

### after training on UA-detrac, let's do some evaluation!
## Use the functions available from utils

precision, recall = corloc(test_boxes, proposed_boxes, iou = 0.5)
print(precision)
print(recall)

### after filtering
filtered_test_boxes = filter_ground_truth(test_boxes)

precision, recall = corloc(filtered_test_boxes, proposed_boxes, iou = 0.5)
print(precision)
print(recall)

### visualization

from utils import *

rows = 5
cols = 3
size = 30
n_samples = test_images.shape[0]
fig, axes = plt.subplots(rows, cols, figsize = (size*cols, size*rows), sharex = True, sharey = True)

for i in range(rows):
    random_index = random.randint(0, n_samples)
    axes[i, 0].imshow(test_images[random_index])
    cv_patches = ml2cv_patches(filtered_test_boxes[random_index])
    ground_image = draw_patches(test_images[random_index], cv_patches)
    axes[i, 1].imshow(ground_image)
    cv_patches2 = ml2cv_patches(proposed_boxes[random_index])
    proposed_image = draw_patches(test_images[random_index], cv_patches2)
    axes[i, 2].imshow(proposed_image)
    print("row", i)
    precision, recall = corloc([filtered_test_boxes[random_index]], [proposed_boxes[random_index]], iou = 0.5)
    print("  precision:", precision, " recall:", recall)
    
    