# Code for using FiftyOne to train a Faster RCNN on COCO data

###  Imports

In [1]:
import torch
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

from dataset import FiftyOneTorchDataset
from model import create_model
from utils import add_detections, get_transforms

from engine import train_model
import config

torch.manual_seed(1)

Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


<torch._C.Generator at 0x1a5e7b8e750>

### Load full dataset from model zoo

In [2]:
#Lodad in the dataset from the FiftyOne model Zoo
fo_train_dataset = foz.load_zoo_dataset("coco-2017", "train")
fo_validation_dataset = foz.load_zoo_dataset("coco-2017", "validation")
#needed to calculate image height and width
fo_train_dataset.compute_metadata()
fo_validation_dataset.compute_metadata()

Downloading split 'train' to 'C:\Users\blain\fiftyone\coco-2017\train' if necessary
Found annotations at 'C:\Users\blain\fiftyone\coco-2017\raw\instances_train2017.json'
Images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
Downloading split 'validation' to 'C:\Users\blain\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\blain\fiftyone\coco-2017\raw\instances_val2017.json'
Images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


For example, cluttered images make it difficult for models to localize objects. We can use FiftyOne to create a view containing only samples with more than, say, 10 objects. You can perform the same operations on views as datasets, so we can create an instance of our PyTorch dataset from this view:

In [3]:
#if we want to see images with more than 10 items, we can
# busy_view = fo_dataset.match(F("ground_truth.detections").length() > 10)
# busy_torch_dataset = FiftyOneTorchDataset(busy_view)
# session.view = busy_view

### Create training and testing views (and corresponding PyTorch datasets) that only contain some items from the full dataset

In [4]:
train_transforms, test_transforms = get_transforms()


print(f'Traning on {len(fo_train_dataset)} samples')
print(f'Testing on {len(fo_validation_dataset)} samples')

session = fo.launch_app(fo_train_dataset)

item_list = fo_train_dataset.distinct("ground_truth.detections.label")

# use our dataset and defined transformations
train_dataset = FiftyOneTorchDataset(fo_train_dataset, train_transforms,
        classes=item_list)
validation_dataset = FiftyOneTorchDataset(fo_validation_dataset, test_transforms,
        classes=item_list)

#session.view = train_view

#this is needed for later use, but not for creating the dataset
if item_list[0] != 'background':
     item_list.insert(0,'background')

Traning on 118287 samples
Testing on 5000 samples


In [5]:
# # map labels to single vehicle class
# vehicle_list = ['car', 'bus', 'truck']
# vehicles_map = {c: "vehicle" for c in vehicle_list}
#
# train_map_view = train_view.map_labels("ground_truth", vehicles_map)
# test_map_view = test_view.map_labels("ground_truth", vehicles_map)
#
# # use our dataset and defined transformations
# torch_map_dataset = FiftyOneTorchDataset(train_map_view, train_transforms)
# torch_map_dataset_test = FiftyOneTorchDataset(test_map_view, test_transforms)

### Training and Evaluation

In [6]:
MODEL_TYPE='CLIP-RPN'
# CLIP-RPN creates an RPN for training and uses CLIP to classify the regions of interest
# CLIP-Backbone-FRCNN creates a FRCNN using CLIP features as the model backbone
# CLIP-FRCNN creates a FRCNN using CLIP features as the model backbone, and embeds the rois using CLIP's embedding
# Fully custom vanilla uses a pre-trained resnet50 backbone, and generates new anchor generator and roi pooling
# Custom-Vanilla uses the pre-trained FRCNN from pytorch and replaces the roi heads only
#
model = create_model(MODEL_TYPE, item_list)

train_model(model, train_dataset, validation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, WEIGHTS_NAME = 'rpn_full_training', batch_size=8, CONTINUE_TRAINING=False)

Using device cuda
creating index...
index created!
Test:  [  0/625]  eta: 0:11:15  model_time: 0.8590 (0.8590)  evaluator_time: 0.0510 (0.0510)  time: 1.0810  data: 0.1620  max mem: 3615


KeyboardInterrupt: 

In [None]:
epoch = 1
torch.save({'epoch': epoch,
                        'model_state_dict': model.state_dict()},
                       f'{MODEL_TYPE}_rpn_{epoch}.pth')

In [None]:
# MODEL_TYPE='CLIP-FRCNN'
# # CLIP-Backbone-FRCNN creates a FRCNN using CLIP features as the model backbone
# # CLIP-FRCNN creates a FRCNN using CLIP features as the model backbone, and embeds the rois using CLIP's embedding
# # Fully custom vanilla uses a pre-trained resnet50 backbone, and generates new anchor generator and roi pooling
# # Custom-Vanilla uses the pre-trained FRCNN from pytorch and replaces the roi heads only
# #
# import clip
# text_tokens = clip.tokenize(["This is " + desc for desc in item_list]).cuda()
#
# model = create_model(MODEL_TYPE, text_tokens)
# test = False
#
# # print(model)
# # print(f'rpn nms thresh: {model.rpn.nms_thresh}')
#
# if test:
#     train_model(model, evaluation_dataset, evaluation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, WEIGHTS_NAME = 'box_regressors', batch_size=2)
# else:
#     train_model(model, train_dataset, evaluation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, WEIGHTS_NAME = 'box_regressors', batch_size=16, CONTINUE_TRAINING=True)
#
# #started at 0941 on 22 March 2022

In [None]:
# MODEL_TYPE='CLIP-Backbone-FRCNN'
#
# model = create_model(MODEL_TYPE, classes=item_list)
# test = True
#
# if test:
#     train_model(model, evaluation_dataset, evaluation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, batch_size=2)
# else:
#     train_model(model, train_dataset, evaluation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, batch_size=16)

In [None]:
# #train a custom vanilla model so that we can compare and make sure the CLIP FRCNN is comparable
# # Fully-Custom-Vanilla is most appropriate as it generates the model in a similar fashion
# MODEL_TYPE = 'Fully-Custom-Vanilla'
#
# vanilla_model = create_model(MODEL_TYPE, classes=item_list)
#
# test = True
#
# if test:
#     train_model(vanilla_model, evaluation_dataset, evaluation_dataset, num_epochs=config.NUM_EPOCHS, MODEL_TYPE=MODEL_TYPE, batch_size=2)
# else:
#     train_model(vanilla_model, train_dataset, evaluation_dataset, num_epochs=10, MODEL_TYPE=MODEL_TYPE)

### Evaluate the model

In [None]:
add_detections(model, evaluation_dataset, fo_dataset, field_name="predictions")

results = fo.evaluate_detections(
    test_view,
    "predictions",
    classes=item_list,
    eval_key="eval",
    compute_mAP=True
)

In [None]:
session.view = test_view
results.mAP()

In [None]:
results.print_report()

By default, objects are only matched with other objects of the same class. In order to get an interesting confusion matrix, we need to match interclass objects by setting `classwise=False`.

In [None]:
results_interclass = fo.evaluate_detections(
    test_view, 
    "predictions", 
    classes=item_list,
    compute_mAP=True, 
    classwise=False
)

In [None]:
plot = results.plot_pr_curves(classes=item_list)
plot.show()

In [None]:
results_interclass.plot_confusion_matrix(classes=item_list, include_other=False, include_missing=False)

The [detection evaluation](https://voxel51.com/docs/fiftyone/user_guide/evaluation.html#detections) also added the attributes `eval_fp`, `eval_tp`, and `eval_fn` to every predicted detection indicating if it is a false positive, true positive, or false negative. 
Let's create a view to find the worst samples by sorting by `eval_fp` using the [FiftyOne App](https://voxel51.com/docs/fiftyone/user_guide/app.html) to visualize the results. 

In [None]:
session.view = test_view.sort_by("eval_fp", reverse=True)

In [None]:
session.view = test_view.sort_by("eval_fp", reverse=True)

It would be best to get this [data reannotated to fix these mistakes](https://towardsdatascience.com/managing-annotation-mistakes-with-fiftyone-and-labelbox-fc6e87b51102), but in the meantime, we can easily remedy this by simply creating a new view that remaps the labels `car`, `truck`, and `bus` all to `vehicle` and then retraining the model with that. This is only possible because we are backing our data in FiftyOne and loading views into PyTorch as needed. Without FiftyOne, the PyTorch dataset class or the underlying data would need to be changed to remap these classes.

In [None]:
# map labels to single vehicle class
vehicle_list = ['car', 'bus', 'truck']
vehicles_map = {c: "vehicle" for c in vehicle_list}

train_map_view = train_view.map_labels("ground_truth", vehicles_map)
test_map_view = test_view.map_labels("ground_truth", vehicles_map)

# use our dataset and defined transformations
torch_map_dataset = FiftyOneTorchDataset(train_map_view, train_transforms)
torch_map_dataset_test = FiftyOneTorchDataset(test_map_view, test_transforms)

In [None]:
# Only 2 classes (background and vehicle)
MODEL_TYPE = 'Vanilla-FRCNN'
vehicle_model = create_model(MODEL_TYPE, num_classes=(len(vehicles_map)+1))
train_model(vehicle_model, torch_map_dataset, torch_map_dataset_test, num_epochs=2, MODEL_TYPE=MODEL_TYPE)

In [None]:
add_detections(vehicle_model, torch_map_dataset_test, test_map_view, field_name="vehicle_predictions")

In [None]:
vehicle_results = fo.evaluate_detections(
    test_map_view, 
    "vehicle_predictions", 
    classes=["vehicle"], 
    eval_key="vehicle_eval", 
    compute_mAP=True
)

In [None]:
vehicle_results.mAP()

In [None]:
vehicle_results.print_report()

Due to our ability to easily visualize and manage our dataset with FiftyOne, we were able to spot and take action on a dataset issue that would otherwise have gone unnoticed if we only concerned ourselves with dataset-wide evaluation metrics and fixed dataset representations. Through these efforts, we managed to increase the mAP of the model to 43%.

Even though this example workflow may not work in all situations, this kind of class-merging strategy can be effective in cases where more fine-grained discrimination is not called for.