###  Imports

In [1]:
import torch

import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

from dataset import FiftyOneTorchDataset
from model import create_model
from utils import add_detections, get_transforms

import config
import pickle

torch.manual_seed(1)

Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


<torch._C.Generator at 0x25c9c9cf730>

In [2]:
# dataset_name = "coco-2017-validation"
dataset_name = "ImageNet_validation"

# The directory containing the dataset to import
dataset_dir = "C:/Data_drive/Data/Imagenet/ImageNet/imagenet_val_dataset/imagenet_val_dataset"

In [3]:
# Check if the datasets exist on this machine
if len(fo.list_datasets()) > 0:
    if dataset_name in fo.list_datasets():
        fo_dataset = fo.load_dataset(dataset_name) # if the dataset  exists, load it
    else:
        if dataset_name == "coco-2017-validation":
            fo_dataset = foz.load_zoo_dataset("coco-2017", "validation")
        else: # if the dataset isnt coco, we will load it from the machine
            fo_dataset = fo.Dataset.from_dir(
            dataset_dir=dataset_dir,
            dataset_type=fo.types.VOCDetectionDataset,
            name=dataset_name,
            )



if dataset_name == "ImageNet_validation":
    with open('dataset_analysis/imagenet_dict_mapping.pkl', 'rb') as f:
                imagenet_class_mapping = pickle.load(f)
    fo_dataset = fo_dataset.map_labels("ground_truth", imagenet_class_mapping)


#needed to calculate image height and width
fo_dataset.compute_metadata()
#create the session to view the dataset
session = fo.launch_app(fo_dataset)

In [4]:
# create the list of labels needed for evaluation, if evaluating on all labels, leave empty

known_unknowns = ['lizard', 'turtle', 'pen', 'cowboy hat', 'tank']
known_knowns = ['clock', 'vase', 'toaster', 'microwave', 'mouse', 'potted plant', 'sports ball', 'zebra', 'dog', 'bird', 'bench', 'parking meter', 'airplane', 'bicycle']



dataset_class_labels = known_knowns + known_unknowns

model_class_labels = known_knowns + known_unknowns


In [5]:
if len(dataset_class_labels) > 0:

    item_view = fo_dataset.filter_labels("ground_truth",
            F("label").is_in(dataset_class_labels))

    # find the class with the fewest examples
    class_count = item_view.count_values("ground_truth.detections.label")
    smallest_class = min(class_count, key=class_count.get) # find the key of the smallest class

    id = set() # create a set to contain the image ids

    for dataset_class in item_view.distinct("ground_truth.detections.label"): # loop through all of the class labels
        class_view = item_view.filter_labels("ground_truth",
            F("label").is_in(dataset_class)) # create a view from which to sample the class
        sample_ids = class_view.take(class_count[smallest_class], seed = 51) # take the number of classes based on the smallest class

        for sample in sample_ids:
            id.add(sample.id) # add the image ids to the set
    item_view = item_view.select(id) # create a view based on these images

else: # if we do not provide labels of interest
    item_view = fo_dataset

    #create an item list for use later
    dataset_class_labels = fo_dataset.distinct("ground_truth.detections.label")


print(f'Evaluating on {len(item_view)} samples')


#get the transformations needed for the images
_, test_transforms = get_transforms()

# use our dataset and defined transformations
evaluation_dataset = FiftyOneTorchDataset(item_view, test_transforms,
        classes=dataset_class_labels)

session.view = item_view

# add a blank line dropped during classification
if model_class_labels[0] != 'background':
     model_class_labels.insert(0,'background')

Evaluating on 1013 samples


In [6]:
# for some items, CLIP may do better with different textual descriptions

replacements = {
    'mouse': 'computer mouse',
}

for k, v in replacements.items():
    CLIP_list = [v if item == k else item for item in model_class_labels]

print(CLIP_list)

['background', 'clock', 'vase', 'toaster', 'microwave', 'computer mouse', 'potted plant', 'sports ball', 'zebra', 'dog', 'bird', 'bench', 'parking meter', 'airplane', 'bicycle', 'lizard', 'turtle', 'pen', 'cowboy hat', 'tank']


# Check CLIP RPN performance

In [7]:
# test out the trained CLIP-FRCNN
MODEL_TYPE = 'CLIP-RPN'
WEIGHTS_NAME='CLIP-RPN_rpn_full_training epoch_30.pth'

# tokenize item list for CLIP
import clip
_, preprocess = clip.load("RN50", device=config.DEVICE)

# create the model
clip_frcnn_model = create_model(MODEL_TYPE, classes=CLIP_list)

# load the pre-trained model
checkpoint = torch.load(WEIGHTS_NAME)
clip_frcnn_model.load_state_dict(checkpoint['model_state_dict'])
epoch = checkpoint['epoch']

print(f'loaded checkpoint at epoch {epoch}')

# set to evaluation mode
clip_frcnn_model.eval()

loaded checkpoint at epoch 30


ZeroShotOD(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): FeatureExtractor(
    (model): ModifiedResNet(
      (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (relu): ReLU(inplace=True)
      (layer1): Sequential(
        (0): Bottleneck(
          (con

#### Sensitivity Study

In [8]:
# first lest check the epsilon for

In [9]:
# find performance without clustering

add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name="clip_RPN_predictions_no_clustering", PRED_CLUSTERING=False)

evaluation = fo.evaluate_detections(
    item_view,
    "clip_RPN_predictions_no_clustering",
    classes=dataset_class_labels,
    eval_key="clip_eval_no_clustering",
    compute_mAP=True
)

print(f'mAP: {evaluation.mAP()}')
evaluation.print_report()

Using device cuda
 100% |███████████████| 1013/1013 [1.8m elapsed, 0s remaining, 11.1 samples/s]      
Evaluating detections...
 100% |███████████████| 1013/1013 [30.1s elapsed, 0s remaining, 39.6 samples/s]      
Performing IoU sweep...
 100% |███████████████| 1013/1013 [50.1s elapsed, 0s remaining, 25.5 samples/s]      
mAP: 0.0067011153219242855
               precision    recall  f1-score   support

        clock       0.02      0.43      0.04        63
         vase       0.03      0.37      0.05        70
      toaster       0.01      0.11      0.02        64
    microwave       0.01      0.30      0.02        57
        mouse       0.00      0.25      0.01        65
 potted plant       0.01      0.37      0.03       165
  sports ball       0.01      0.36      0.02        94
        zebra       0.02      0.90      0.05        88
          dog       0.02      0.89      0.03        74
         bird       0.01      0.78      0.03        69
        bench       0.01      0.65      0.0

In [None]:
# find performance with clustering

results = []
best_map = 0

for eps in range(5, 50, 5):
    print(f'epsilon = {eps}')
    add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name=f"clip_RPN_predictions_clustering_{eps}", PRED_CLUSTERING=True, eps=eps)

    evaluation = fo.evaluate_detections(
        item_view,
        f"clip_RPN_predictions_clustering_{eps}",
        classes=dataset_class_labels,
        eval_key=f"clip_eval_clustering_{eps}",
        compute_mAP=True
    )

    map = evaluation.mAP()
    print(f'mAP = {map}')

    results.append([eps, map])
    if map > best_map:
        best_eps = eps
        best_map = map
        best_eval = evaluation


epsilon = 5
Using device cuda
  71% |██████████-----|  719/1013 [1.6m elapsed, 40.2s remaining, 7.3 samples/s]   

In [None]:
print(f'best_mAP = {best_map}')
print(f'best_eps = {best_eps}')
best_eval.print_report()

In [None]:
# # with clustering
# add_detections(clip_frcnn_model, evaluation_dataset, fo_dataset, field_name=f"clip_RPN_predictions_clustering", PRED_CLUSTERING=True, eps=30)
#
# evaluation = fo.evaluate_detections(
#     item_view,
#     f"clip_RPN_predictions_clustering",
#     classes=dataset_class_labels,
#     eval_key=f"clip_eval_clustering",
#     compute_mAP=True
# )
#
# map = evaluation.mAP()
# print(f'mAP = {map}')
#
# results.append([eps, map])
# if map > best_map:
#     best_eps = eps
#     best_map = map
#     best_eval = evaluation


In [None]:
# Test against Faster-RCNN

In [None]:
# create the labelmap for the coco dataset
coco_labels = open("dataset_analysis/coco_labels.txt", "r")
coco_list = coco_labels.read().splitlines() # read each line in as a value in a list
coco_list.insert(0,'background') # add the background class
coco_id = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
          64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91] # annoyingly, COCO has 90 class ids but only 80 labels
coco = dict(zip(coco_id, coco_list)) # convert it to a dict

In [None]:
import torchvision
# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# test out the standard FRCNN
add_detections(model, evaluation_dataset, fo_dataset, field_name="FRCNN_pretrained", labelmap=coco)

evaluation = fo.evaluate_detections(
    item_view,
    "FRCNN_pretrained",
    classes=dataset_class_labels,
    eval_key="FRCNN",
    compute_mAP=True
)

print(f'mAP: {evaluation.mAP()}')
evaluation.print_report()

In [None]:

session.view = item_view