# Install dependencies

In [None]:
!pip install fiftyone==0.14.2 
!pip install ipywidgets>=7.5
!pip install tensorflow torch torchvision umap-learn
!pip install albumentations==1.1.0
!pip install opencv-python==4.5.5.62 
!pip install opencv-python-headless==4.5.4.60

# Load data

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.coco as fouc
import numpy as np

In [None]:
classes_list = ["Cat", "Dog"]

In [None]:
train_dataset = foz.load_zoo_dataset(
    "open-images-v6", 
    split="train", 
    label_types=["detections"], 
    classes=classes_list,
    max_samples=2000,
    seed=51,
    shuffle=True,
    dataset_name="cat_dog_train"
)
train_dataset.persistent = True
# Using `persistent` property is useful when working in a local runtime session. 
# Using Colab hosted runtime you will need to download the data again.

In [None]:
test_dataset = foz.load_zoo_dataset(
    "open-images-v6", 
    split="test", 
    label_types=["detections"], 
    classes=classes_list,
    max_samples=500,
    seed=51,
    shuffle=True,
    dataset_name="cat_dog_test",
)
test_dataset.persistent = True

In [None]:
val_dataset = foz.load_zoo_dataset(
    "open-images-v6", 
    split="validation", 
    label_types=["detections"], 
    classes=classes_list,
    seed=51,
    max_samples=500,
    shuffle=True,
    dataset_name="cat_dog_val",
)
val_dataset.persistent = True

In [None]:
print(fo.list_datasets())

In [None]:
from fiftyone import ViewField as F

train_dataset = fo.load_dataset("cat_dog_train").filter_labels( "detections", F("label").is_in(classes_list))
test_dataset = fo.load_dataset("cat_dog_test").filter_labels( "detections", F("label").is_in(classes_list))
val_dataset = fo.load_dataset("cat_dog_val").filter_labels( "detections", F("label").is_in(classes_list))

In [None]:
len(train_dataset), len(test_dataset), len(val_dataset)

In [None]:
session = fo.launch_app(train_dataset)

# Prepare Dataset class

In [None]:
import torch
import torchvision
import cv2

torch.manual_seed(42)

In [None]:
class FiftyOneTorchDataset(torch.utils.data.Dataset):
    """A class to construct a PyTorch dataset from a FiftyOne dataset.
    
    Args:
        fiftyone_dataset: a FiftyOne dataset or view that will be used for training or testing
        transforms (None): a list of PyTorch transforms to apply to images and targets when loading
        gt_field ("ground_truth"): the name of the field in fiftyone_dataset that contains the 
            desired labels to load
        classes (None): a list of class strings that are used to define the mapping between
            class names and indices. If None, it will use all classes present in the given fiftyone_dataset.
    """

    def __init__(
        self,
        fiftyone_dataset,
        transforms=None,
        gt_field="detections",
        classes=None,
    ):
        self.samples = fiftyone_dataset
        self.transforms = transforms
        self.gt_field = gt_field

        self.img_paths = self.samples.values("filepath")

        self.classes = classes
        if not self.classes:
            # Get list of distinct labels that exist in the view
            self.classes = self.samples.distinct(
                "%s.detections.label" % gt_field
            )

        if self.classes[0] != "background":
            self.classes = ["background"] + self.classes

        self.labels_map_rev = {c: i for i, c in enumerate(self.classes)}

    def __getitem__(self, idx):
        # reading the images and converting them to correct color  
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # prepairing target
        sample = self.samples[img_path]   
        # cv2 image gives size as height x width    
        wt = img.shape[1]
        ht = img.shape[0]

        boxes = []
        labels = []

        detections = sample[self.gt_field].detections
        for det in detections:
            if det.label not in self.classes:
                continue
                
            category_id = self.labels_map_rev[det.label]
            x, y, w, h = det["bounding_box"]
            boxes.append([x * wt, y * ht, (x + w) * wt, (y + h) * ht])
            labels.append(category_id)

        
        # applying augmentations
        if self.transforms is not None:
            transformed = self.transforms(image=img,bboxes=boxes, category_ids=labels)
            img = transformed["image"]
            boxes = transformed["bboxes"]
            labels = transformed["category_ids"]

        # convert boxes into a torch.Tensor                
        boxes = torch.as_tensor(boxes, dtype=torch.float32)                
            
        target = {}
        target["boxes"] = boxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["image_id"] = torch.as_tensor([idx])

        # getting the areas of the boxes
        target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        target["iscrowd"] = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        return img, target

    def __len__(self):
        return len(self.img_paths)

    def get_classes(self):
        return self.classes

# Augmentations

In [None]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
train_transform = A.Compose(
    [
        A.LongestMaxSize(320),
        A.PadIfNeeded(min_height=320, min_width=320, border_mode=0),
        A.RandomSizedBBoxSafeCrop(width=300, height=300, erosion_rate=0.1),
     
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3),
        A.ToFloat(max_value=255, p=1, always_apply=True),

        ToTensorV2(p=1.0)
    ],
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']),
)

test_transform = A.Compose(
    [
        A.LongestMaxSize(300),
        A.PadIfNeeded(min_height=300, min_width=300, border_mode=0),
        A.ToFloat(max_value=255, p=1, always_apply=True),

        ToTensorV2(p=1.0)
    ],
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']),
)

# Training

In [None]:
torch_dataset = FiftyOneTorchDataset(train_dataset, train_transform, 
        classes=classes_list, gt_field="detections")
torch_dataset_test = FiftyOneTorchDataset(test_dataset, test_transform, 
        classes=classes_list, gt_field="detections")

In [None]:
model = torchvision.models.detection.ssd300_vgg16(pretrained=True)

In [None]:
from torchvision.models.detection.ssd import SSDHead
in_channels = [x.in_channels for x in model.head.classification_head.module_list]
head = SSDHead(in_channels=in_channels , num_anchors=[4,6,6,6,4,4] , num_classes=len(classes_list)+1)
model.head = head

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
bs = 16
test_bs = 1
num_epochs = 50
learning_rate = 0.0005

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
data_loader = torch.utils.data.DataLoader(
    torch_dataset, batch_size=bs, shuffle=True, num_workers=2,
    collate_fn=collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    torch_dataset_test, batch_size=test_bs, shuffle=False, num_workers=2,
    collate_fn=collate_fn)

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using device %s" % device)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=learning_rate,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler
lr_scheduler = None
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                 step_size=3,
#                                                 gamma=0.1)

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/cat_dog_experiment_1_ssd_lr5e-4')


In [None]:
%load_ext tensorboard
%tensorboard --logdir runs --port=6006

In [None]:
# Clone TorchVision repo and copy helper files
!git clone https://github.com/pytorch/vision.git
%cd vision
!git checkout v0.3.0
%cd ..
!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./


In [None]:
from engine import evaluate

In [None]:
loss_hist = Averager()
itr = 1
cpu_device = torch.device("cpu")

best_metric = 0

for epoch in range(num_epochs):
    print(f"Epoch {epoch}")
    model.train()
    loss_hist.reset()
    with fo.ProgressBar() as pb:
        for images, targets in pb(data_loader):
        
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()
            loss_hist.send(loss_value)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            writer.add_scalar('Loss_iter/train', loss_value, itr)

            itr += 1
        
        # update the learning rate
        if lr_scheduler is not None:
            lr_scheduler.step()

        writer.add_scalar('Loss/train', loss_hist.value, epoch)

    #Evaluation
    cc_ev = evaluate(model, data_loader_test, device=device)
    current_metric = cc_ev.coco_eval["bbox"].stats[0]
    writer.add_scalar('mAP/test', current_metric, epoch)

    #Saving the best weights
    if current_metric > best_metric:
        best_metric = current_metric
        torch.save(model.state_dict(), "best_weights.pth")
    

# Evaluating

In [None]:
import fiftyone.utils.coco as fouc

def convert_torch_predictions(preds, det_id, s_id, w, h, classes):
    # Convert the outputs of the torch model into a FiftyOne Detections object
    dets = []
    scale = max(w, h)
    shift = abs(w - h) / 2
    if w > h:
        shift = np.tile([0,1], 2) * shift
    else:
        shift = np.tile([1,0], 2) * shift
        
    for bbox, label, score in zip(
        preds["boxes"].cpu().detach().numpy(), 
        preds["labels"].cpu().detach().numpy(), 
        preds["scores"].cpu().detach().numpy()
    ):

        # Parse prediction into FiftyOne Detection object
        x0, y0, x1, y1 = bbox / 300 * scale - shift
        coco_obj = fouc.COCOObject(det_id, s_id, int(label), [x0, y0, x1-x0, y1-y0])
        det = coco_obj.to_detection((w,h), classes)
        det["confidence"] = float(score)
        dets.append(det)
        det_id += 1
        
    detections = fo.Detections(detections=dets)
        
    return detections, det_id

def add_detections(model, torch_dataset, view, field_name="predictions"):
    # Run inference on a dataset and add results to FiftyOne
    torch.set_num_threads(1)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print("Using device %s" % device)

    model.eval()
    model.to(device)
    image_paths = torch_dataset.img_paths
    classes = torch_dataset.classes
    det_id = 0
    
    with fo.ProgressBar() as pb:
        for img, targets in pb(torch_dataset):
            # Get FiftyOne sample indexed by unique image filepath
            img_id = int(targets["image_id"][0])
            img_path = image_paths[img_id]
            sample = view[img_path]
            s_id = sample.id
            w, h = 0, 0
            if sample.metadata is None:
                img_raw = cv2.imread(img_path)
                h, w, _ = img_raw.shape
            else:
                w = sample.metadata["width"]
                h = sample.metadata["height"]
            
            # Inference
            preds = model(img.unsqueeze(0).to(device))[0]
            
            detections, det_id = convert_torch_predictions(
                preds, 
                det_id, 
                s_id, 
                w, 
                h, 
                classes,
            )
            
            sample[field_name] = detections
            sample.save()

In [None]:
torch_dataset_val = FiftyOneTorchDataset(val_dataset, test_transform, 
        classes=classes_list, gt_field="detections")

In [None]:
add_detections(model, torch_dataset_val, val_dataset, field_name="predictions")

In [None]:
metric_view = val_dataset.filter_labels("predictions", F("confidence") > 0.5)

In [None]:
results = fo.evaluate_detections(
    metric_view,
    "predictions",
    gt_field="detections", 
    classes=classes_list, 
    eval_key="eval", 
    compute_mAP=True
)

In [None]:
results.mAP(), results.metrics() 