###  Imports

In [1]:
import torch

import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

from dataset import FiftyOneTorchDataset
from model import create_model
from utils import add_detections, get_transforms

import config

torch.manual_seed(1)

Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


<torch._C.Generator at 0x230bffdc730>

In [2]:
# delete any existing datasets and start fresh
if len(fo.list_datasets()) > 0:
    dataset = fo.load_dataset("coco-2017-validation")
    dataset.delete()

In [3]:
# create the list of labels needed for evaluation, if evaluating on all labels, leave empty

known_unknowns = ['lizard', 'turtle', 'pen', 'cowboy hat', 'tank']
known_knowns = ['clock', 'vase', 'toaster', 'microwave', 'mouse', 'plant', 'sports ball', 'zebra', 'dog', 'bird', 'bench', 'parking meter', 'airplane', 'bicycle']

item_list = known_knowns + known_unknowns

In [4]:
#Load in the COCO validation dataset from the FiftyOne model Zoo
fo_coco_val_dataset = foz.load_zoo_dataset("coco-2017", "validation")

#needed to calculate image height and width
fo_coco_val_dataset.compute_metadata()

#create the session to view the dataset
session = fo.launch_app(fo_coco_val_dataset)

if len(item_list) > 0:

    item_view = fo_coco_val_dataset.filter_labels("ground_truth",
            F("label").is_in(item_list))


else: # if we do not provide labels of interest
    item_view = fo_coco_val_dataset

    #create an item list for use later
    item_list = fo_coco_val_dataset.distinct("ground_truth.detections.label")


print(f'Evaluating on {len(item_view)} samples')


#get the transformations needed for the images
_, test_transforms = get_transforms()

# use our dataset and defined transformations
evaluation_dataset = FiftyOneTorchDataset(item_view, test_transforms,
        classes=item_list)

Downloading split 'validation' to 'C:\Users\blain\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\blain\fiftyone\coco-2017\raw\instances_val2017.json'
Images already downloaded
Existing download of split 'validation' is sufficient
Loading 'coco-2017' split 'validation'
 100% |███████████████| 5000/5000 [15.0s elapsed, 0s remaining, 343.7 samples/s]      
Dataset 'coco-2017-validation' created


Evaluating on 1422 samples


# Check Faster RCNN performance

In [5]:
# MODEL_TYPE = 'CLIP-Backbone-FRCNN'
# CHECKPOINT_NAME = f'{MODEL_TYPE}_epoch_28.pth'
#
# if item_list[0] != 'background':
#      item_list.insert(0,'background')
#
# frcnn_model = create_model(MODEL_TYPE, classes=item_list)
# checkpoint = torch.load(CHECKPOINT_NAME)
# frcnn_model = create_model(MODEL_TYPE, classes=item_list)
#
# frcnn_model.load_state_dict(checkpoint)
# frcnn_model.eval()
#
# add_detections(frcnn_model, evaluation_dataset, fo_dataset, field_name="frcnn_predictions")
#
# results = fo.evaluate_detections(
#     test_view,
#     "frcnn_predictions",
#     classes=item_list,
#     eval_key="eval",
#     compute_mAP=True
# )
# session.view = item_view
# print(f'mAP: {results.mAP()}')
# results.print_report()

# Check CLIP RPN performance

In [6]:
# test out the trained CLIP-FRCNN
MODEL_TYPE = 'CLIP-RPN'
WEIGHTS_NAME='CLIP-RPN_rpn_full_training epoch_1.pth'

# tokenize item list for CLIP
import clip
_, preprocess = clip.load("RN50", device=config.DEVICE)

# add a blank line dropped during classification
if item_list[0] != 'background':
     item_list.insert(0,'background')

# create the model
clip_frcnn_model = create_model(MODEL_TYPE, classes=item_list)

# load the pre-trained model
checkpoint = torch.load(WEIGHTS_NAME)
clip_frcnn_model.load_state_dict(checkpoint['model_state_dict'])
epoch = checkpoint['epoch']

print(f'loaded checkpoint at epoch {epoch}')

# set to evaluation mode
clip_frcnn_model.eval()

loaded checkpoint at epoch 1


ZeroShotOD(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): FeatureExtractor(
    (model): ModifiedResNet(
      (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (relu): ReLU(inplace=True)
      (layer1): Sequential(
        (0): Bottleneck(
          (con

In [7]:
fo_dataset = fo_coco_val_dataset

#### Sensitivity Study

In [8]:
# first lest check the epsilon for

In [None]:
# find performance with clustering

results = []
best_map = 0

for eps in range(5, 50, 5):
    print(f'epsilon = {eps}')
    add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name=f"clip_RPN_predictions_no_clustering_{eps}", PRED_CLUSTERING=True, eps=eps)

    evaluation = fo.evaluate_detections(
        item_view,
        f"clip_RPN_predictions_no_clustering_{eps}",
        classes=item_list,
        eval_key=f"clip_eval_no_clustering_{eps}",
        compute_mAP=True
    )

    map = evaluation.mAP()
    print(f'mAP = {map}')

    results.append([eps, map])
    if map > best_map:
        best_eps = eps
        best_map = map
        best_eval = evaluation


epsilon = 5
Using device cuda
  12% |█|-------------|  169/1422 [20.8s elapsed, 2.5m remaining, 9.6 samples/s]   

In [None]:
print(f'best_mAP = {best_map}')
print(f'best_eps = {best_eps}')
best_eval.print_report()

In [None]:
results

In [None]:
# find performance without clustering

add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name="clip_RPN_predictions_no_clustering", PRED_CLUSTERING=False)

evaluation = fo.evaluate_detections(
    item_view,
    "clip_RPN_predictions_no_clustering",
    classes=item_list,
    eval_key="clip_eval_no_clustering",
    compute_mAP=True
)

print(f'mAP: {evaluation.mAP()}')
evaluation.print_report()

In [None]:
# Test against Faster-RCNN

In [None]:
# create the labelmap for the coco dataset
coco_labels = open("dataset_analysis/coco_labels.txt", "r")
coco_list = coco_labels.read().splitlines() # read each line in as a value in a list
coco_list.insert(0,'background') # add the background class
coco_id = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
          64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91] # annoyingly, COCO has 90 class ids but only 80 labels
coco = dict(zip(coco_id, coco_list)) # convert it to a dict

In [None]:
import torchvision
# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# test out the standard FRCNN
add_detections(model, evaluation_dataset, fo_dataset, field_name="FRCNN_pretrained", labelmap=coco)

evaluation = fo.evaluate_detections(
    item_view,
    "FRCNN_pretrained",
    classes=item_list,
    eval_key="FRCNN",
    compute_mAP=True
)

print(f'mAP: {evaluation.mAP()}')
evaluation.print_report()

In [None]:

session.view = item_view