###  Imports

In [1]:
import torch

import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

from dataset import FiftyOneTorchDataset
from model import create_model
from utils import add_detections, get_transforms

import config

torch.manual_seed(1)

Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


<torch._C.Generator at 0x1b4e479b750>

In [2]:
# delete any existing datasets and start fresh
if len(fo.list_datasets()) > 0:
    dataset = fo.load_dataset("coco-2017-validation")
    dataset.delete()

In [3]:
# create the list of labels needed for evaluation, if evaluating on all labels, leave empty

known_unknowns = ['lizard', 'turtle', 'pen', 'cowboy hat', 'tank']
known_knowns = ['clock', 'vase', 'toaster', 'microwave', 'mouse', 'plant', 'sports ball', 'zebra', 'dog', 'bird', 'bench', 'parking meter', 'airplane', 'bicycle']

item_list = known_knowns + known_unknowns

In [4]:
#Load in the COCO validation dataset from the FiftyOne model Zoo
fo_coco_val_dataset = foz.load_zoo_dataset("coco-2017", "validation")

#needed to calculate image height and width
fo_coco_val_dataset.compute_metadata()

#create the session to view the dataset
session = fo.launch_app(fo_coco_val_dataset)

if len(item_list) > 0:

    item_view = fo_coco_val_dataset.filter_labels("ground_truth",
            F("label").is_in(item_list))

    # split the dataset in train and test sets
    train_view = item_view.take((len(item_view) * config.TRAIN_TEST_SPLIT), seed=51)
    test_view = item_view.exclude([s.id for s in train_view])

else: # if we do not provide labels of interest
    #split the entire dataset into tran and test sets
    train_view = fo_coco_val_dataset.take(len(fo_coco_val_dataset) * config.TRAIN_TEST_SPLIT)
    test_view = fo_coco_val_dataset.exclude([s.id for s in train_view])

    #create an item list for use later
    item_list = fo_coco_val_dataset.distinct("ground_truth.detections.label")


print(f'Traning on {len(train_view)} samples')
print(f'Testing on {len(test_view)} samples')


#get the transformations needed for the iamges
train_transforms, test_transforms = get_transforms()

# use our dataset and defined transformations
train_dataset = FiftyOneTorchDataset(train_view, train_transforms,
        classes=item_list)
evaluation_dataset = FiftyOneTorchDataset(test_view, test_transforms,
        classes=item_list)

Downloading split 'validation' to 'C:\Users\blain\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\blain\fiftyone\coco-2017\raw\instances_val2017.json'
Images already downloaded
Existing download of split 'validation' is sufficient
Loading 'coco-2017' split 'validation'
 100% |███████████████| 5000/5000 [15.4s elapsed, 0s remaining, 332.3 samples/s]      
Dataset 'coco-2017-validation' created


Traning on 1137 samples
Testing on 285 samples


# Check Faster RCNN performance

In [5]:
# MODEL_TYPE = 'CLIP-Backbone-FRCNN'
# CHECKPOINT_NAME = f'{MODEL_TYPE}_epoch_28.pth'
#
# if item_list[0] != 'background':
#      item_list.insert(0,'background')
#
# frcnn_model = create_model(MODEL_TYPE, classes=item_list)
# checkpoint = torch.load(CHECKPOINT_NAME)
# frcnn_model = create_model(MODEL_TYPE, classes=item_list)
#
# frcnn_model.load_state_dict(checkpoint)
# frcnn_model.eval()
#
# add_detections(frcnn_model, evaluation_dataset, fo_dataset, field_name="frcnn_predictions")
#
# results = fo.evaluate_detections(
#     test_view,
#     "frcnn_predictions",
#     classes=item_list,
#     eval_key="eval",
#     compute_mAP=True
# )
# session.view = item_view
# print(f'mAP: {results.mAP()}')
# results.print_report()

# Check CLIP FRCNN performance

In [6]:
# test out the trained CLIP-FRCNN
MODEL_TYPE = 'CLIP-RPN'
WEIGHTS_NAME = 'rpn'

# tokenize item list for CLIP
import clip
_, preprocess = clip.load("RN50", device=config.DEVICE)

# add a blank line dropped during classification
if item_list[0] != '':
     item_list.insert(0,' ')

# tokenize per CLIP paper instructions
text_tokens = clip.tokenize(["This is a picture of a " + desc for desc in item_list]).cuda()

# create the model
clip_frcnn_model = create_model(MODEL_TYPE, classes=text_tokens)

# load the pre-trained model
CHECKPOINT_NAME = f'{MODEL_TYPE}_{WEIGHTS_NAME}.pth'
checkpoint = torch.load(CHECKPOINT_NAME)
clip_frcnn_model.load_state_dict(checkpoint['model_state_dict'])
epoch = checkpoint['epoch']

print(f'loaded checkpoint at epoch {epoch}')

# set to evaluation mode
clip_frcnn_model.eval()

loaded checkpoint at epoch 23


ZeroShotOD(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): FeatureExtractor(
    (model): ModifiedResNet(
      (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (relu): ReLU(inplace=True)
      (layer1): Sequential(
        (0): Bottleneck(
          (con

In [7]:
fo_dataset = fo_coco_val_dataset

In [8]:
# find performance without clustering
add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name="clip_RPN_predictions_no_clustering", PRED_CLUSTERING=False)

results = fo.evaluate_detections(
    test_view,
    "clip_RPN_predictions_no_clustering",
    classes=item_list,
    eval_key="clip_eval_no_clustering",
    compute_mAP=True
)

print(f'mAP: {results.mAP()}')
results.print_report()

Using device cuda
 100% |█████████████████| 285/285 [31.2s elapsed, 0s remaining, 9.9 samples/s]       
Evaluating detections...
 100% |█████████████████| 285/285 [10.2s elapsed, 0s remaining, 29.8 samples/s]      
Performing IoU sweep...
 100% |█████████████████| 285/285 [17.2s elapsed, 0s remaining, 17.0 samples/s]      
mAP: 0.008850472137577172
               precision    recall  f1-score   support

                    0.00      0.00      0.00         0
        clock       0.02      0.40      0.04        45
         vase       0.03      0.27      0.06        45
      toaster       0.00      0.00      0.00         0
    microwave       0.00      0.11      0.01         9
        mouse       0.00      0.08      0.00        24
        plant       0.00      0.00      0.00         0
  sports ball       0.02      0.32      0.03        79
        zebra       0.07      0.78      0.12       102
          dog       0.02      0.80      0.04        46
         bird       0.08      0.77      0.1

In [9]:
# test out the trained CLIP-FRCNN with pred_clustering

add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name="clip_RPN_predictions_with_clustering", PRED_CLUSTERING=True)

results = fo.evaluate_detections(
    test_view,
    "clip_RPN_predictions_with_clustering",
    classes=item_list,
    eval_key="clip_eval_with_clustering",
    compute_mAP=True
)

print(f'mAP: {results.mAP()}')
results.print_report()

Using device cuda
 100% |█████████████████| 285/285 [41.9s elapsed, 0s remaining, 6.7 samples/s]      
Evaluating detections...
 100% |█████████████████| 285/285 [2.1s elapsed, 0s remaining, 166.9 samples/s]      
Performing IoU sweep...
 100% |█████████████████| 285/285 [1.8s elapsed, 0s remaining, 169.6 samples/s]         
mAP: 0.0169637347926074
               precision    recall  f1-score   support

                    0.00      0.00      0.00         0
        clock       0.20      0.20      0.20        45
         vase       0.04      0.04      0.04        45
      toaster       0.00      0.00      0.00         0
    microwave       0.00      0.00      0.00         9
        mouse       0.00      0.00      0.00        24
        plant       0.00      0.00      0.00         0
  sports ball       0.03      0.05      0.04        62
        zebra       0.18      0.22      0.20        60
          dog       0.16      0.43      0.24        46
         bird       0.12      0.13      0.1

In [10]:
# Test against Faster-RCNN

In [11]:
# create the labelmap
coco_labels = open("dataset_analysis/coco_labels.txt", "r")
coco_list = coco_labels.read().splitlines() # read each line in as a value in a list
coco_list.insert(0,'background') # add the background class
coco_id = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
          64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91] # annoyingly, COCO has 90 class ids but only 80 labels
coco = dict(zip(coco_id, coco_list)) # convert it to a dict

{0: 'background', 1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell p

In [12]:
import torchvision
# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# test out the standard FRCNN
add_detections(model, evaluation_dataset, fo_dataset, field_name="FRCNN_pretrained", labelmap=coco)

results = fo.evaluate_detections(
    test_view,
    "FRCNN_pretrained",
    classes=item_list,
    eval_key="FRCNN",
    compute_mAP=True
)

print(f'mAP: {results.mAP()}')
results.print_report()

Using device cuda
 100% |█████████████████| 285/285 [40.6s elapsed, 0s remaining, 7.2 samples/s]      
Evaluating detections...
 100% |█████████████████| 285/285 [5.0s elapsed, 0s remaining, 62.6 samples/s]       
Performing IoU sweep...
 100% |█████████████████| 285/285 [7.4s elapsed, 0s remaining, 41.2 samples/s]       
mAP: 0.4593927641641096
               precision    recall  f1-score   support

                    0.00      0.00      0.00         0
        clock       0.35      0.82      0.49        45
         vase       0.21      0.71      0.33        45
      toaster       0.00      0.00      0.00         0
    microwave       0.24      0.89      0.38         9
        mouse       0.47      0.96      0.63        24
        plant       0.00      0.00      0.00         0
  sports ball       0.38      0.62      0.48        64
        zebra       0.52      0.94      0.67        89
          dog       0.35      0.87      0.50        46
         bird       0.55      0.87      0.68  

In [14]:

session.view = test_view