[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/johnpolsh/inf721-tpfinal/blob/main/colab/Object_detection_model.ipynb)
# Setup
### Download dependencies

In [None]:
!pip install wget

### Default imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import torch
import wget

### Select back-end device

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

print(f"Using {device} as default device")

### Downloading datasets

In [None]:
metadata = "https://scontent.fplu8-1.fna.fbcdn.net/m1/v/t6/An8K4G08lXqX2Om6ZxT8yc0w9oEoqNjimpfZSGFLENsvJ3xB4nuKak0A762P82rRnwptKSXdgwHQm1cdHgKqRu2tTsutxrPfiz_kApnl3AmOSQNiU2njLSlnjxlI.json?ccb=10-5&oh=00_AfD3JBYfL1cuglzrrKOiqEeDBIXH7dMtDk_Ha8HzuulqZg&oe=658FF6FB&_nc_sid=a7aa5b"
train = "https://scontent.fplu8-1.fna.fbcdn.net/m1/v/t6/An-WS2mQvnrkM05xVRmd4NwzvUG42KxJV294Caeos-c0h8-XkxRyU9m4AdDvW5x9Sgxi4xHcXHkVkk0JyKtRZCmwCyw04Z-0ulrwQNAayOqnMvDkJvhL3nKJgtcUrA.json?ccb=10-5&oh=00_AfBbM0WoaaXAVNwoiJ1ahvpcNMs5tw-mpiTUAKXAmCfTqQ&oe=658DB23C&_nc_sid=a7aa5b"
eval = "https://scontent.fplu8-1.fna.fbcdn.net/m1/v/t6/An8ggk-BJQsp9pd3ra7o4f-xVlvsiNOzF7zrMHk124kuRtX_q5k3bMeO5t0LnG3LEEJuHLKZhKOYjQj7WB4dVnOtkTBG5cV4_9E4vv1KznH6Mt9SXAaTjbzJKrs.json?ccb=10-5&oh=00_AfDqP_wnEdETQz9n69Z9-dqV1xUzWqTpU_wxehG7JvKtMg&oe=658DDFC5&_nc_sid=a7aa5b"
images = "https://scontent.fplu8-1.fna.fbcdn.net/m1/v/t6/An8hVtaVFSLA4yMZFPktRgsXzMN0lbpzHWAXmD3nHmtOt0pV9u5aUW2XbTTDB2w4MgEFSWAjPz34t0chIVdMaGXDIBZ2xPGqicVHKcd1wMqEy76lMac.zip?ccb=10-5&oh=00_AfBKkJzUTrbsjTddDHf4UzK2zrqtUukVNunWJdeEoNsGCw&oe=658DBFBE&_nc_sid=a7aa5b"

ego_objects_root = "EgoObjects"

if not os.path.isfile(f"${ego_objects_root}/data/EgoObjectsV1_unified_metadata.json"):
    wget.download(metadata, f"${ego_objects_root}/data/EgoObjectsV1_unified_metadata.json")

if not os.path.isfile(f"${ego_objects_root}/data/EgoObjectsV1_unified_train.json"):
    wget.download(train, f"${ego_objects_root}/data/EgoObjectsV1_unified_train.json")

if not os.path.isfile(f"${ego_objects_root}/data/EgoObjectsV1_unified_eval.json"):
    wget.download(eval, f"${ego_objects_root}/data/EgoObjectsV1_unified_eval.json")
    
if not os.path.isfile(f"${ego_objects_root}/data/EgoObjectsV1_images.zip"):
    wget.download(images, f"${ego_objects_root}/data/EgoObjectsV1_images.zip")

In [None]:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
!git clone https://github.com/facebookresearch/EgoObjects.git

In [None]:
import sys
sys.path.insert(2, './EgoObjects/egoobjects_api')

In [None]:
import json
import logging
import unittest
import numpy as np
from copy import deepcopy
from egoobjects import EgoObjects, FILTER_OPTS
from detectron2.data import MetadataCatalog

gt_json_file = "./EgoObjects/data/EgoObjectsV1_unified_train.json"
metadata_json_file = "./EgoObjects/data/EgoObjectsV1_unified_metadata.json"

def get_egoobjects_meta(metadata_path: str):
    """
    return metadata dictionary with 4 keys:
        cat_det_cats
        inst_det_cats
        cat_det_cat_id_2_cont_id
        cat_det_cat_names
    """
    with open(metadata_path, "r") as fp:
        metadata = json.load(fp)

    cat_det_cat_id_2_name = {cat["id"]: cat["name"] for cat in metadata["cat_det_cats"]}
    cat_det_cat_ids = sorted([cat["id"] for cat in metadata["cat_det_cats"]])
    cat_det_cat_id_2_cont_id = {cat_id: i for i, cat_id in enumerate(cat_det_cat_ids)}
    cat_det_cat_names = [cat_det_cat_id_2_name[cat_id] for cat_id in cat_det_cat_ids]

    metadata["cat_det_cat_id_2_cont_id"] = cat_det_cat_id_2_cont_id
    metadata["cat_det_cat_names"] = cat_det_cat_names
    return metadata

dataset_name = "EgoObjects"
metadata = get_egoobjects_meta(metadata_json_file)
MetadataCatalog.get(dataset_name).set(**metadata)
metadata = MetadataCatalog.get(dataset_name)

split = "egoobjects_unified_det_train"
gt = EgoObjects(gt_json_file, metadata, filter_opts=FILTER_OPTS[split])
print(f"img ids: ${gt.get_img_ids()}")
print(f"class ids: ${gt.get_class_ids()}")
print(f"annot ids: ${gt.get_ann_ids()}")

# Model
### Our architecture definition

In [None]:
from torch import nn

def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


#dw
class DepthWiseConvolution(nn.Sequential):
    def __init__(self, in_fts, stride = 1):
        super(DepthWiseConvolution,self).__init__(
            nn.Conv2d(in_fts,in_fts,kernel_size=(3,3),stride=stride,padding=(1,1), groups=in_fts, bias=False),
            nn.BatchNorm2d(in_fts),
            nn.ReLU6(inplace=True))


#pw
class PointWiseConvolution(nn.Sequential):
    def __init__(self,in_fts,out_fts):
        super(PointWiseConvolution,self).__init__(
            nn.Conv2d(in_fts,out_fts,kernel_size=(1,1),bias=False),
            nn.BatchNorm2d(out_fts),
            nn.ReLU6(inplace=True))


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            norm_layer(out_planes),
            nn.ReLU6(inplace=True)
        )



class Bottleneck(nn.Module):
    def __init__(self,inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d):
        super(Bottleneck, self).__init__()
        self.stride = stride

        hidden_dim = int(round(inp*expand_ratio))
        layers = []
        self.use_res_connect = self.stride == 1 and inp == oup

        #pw
        if expand_ratio != 1:
            layers.append(PointWiseConvolution(inp,hidden_dim))

        #dw
        layers.extend([
            DepthWiseConvolution(hidden_dim,stride),
            #pw-linear
            nn.Conv2d(hidden_dim,oup,1,1,0,bias=False),
            nn.BatchNorm2d(oup)])

        self.conv = nn.Sequential(*layers)


    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)


class OurObjectDetectionNet(nn.Module):
    def __init__(self,bottleneckLayerDetail,inp = 3,num_classes = 50,width_mult = 1.0,round_nearest=8):
        super(OurObjectDetectionNet, self).__init__()

        self.out = None

        bloco = Bottleneck
        inverted_residual_setting = bottleneckLayerDetail

        input_channel = 32
        last_channel = 1280

        input_channel = _make_divisible(input_channel*width_mult,round_nearest)
        self.last_channel = _make_divisible(last_channel*width_mult,round_nearest)

        #first layer
        features = [ConvBNReLU(inp, input_channel, stride=2)]

        #build layers
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c*width_mult,round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(bloco(input_channel,output_channel,stride = stride,expand_ratio=t))
                input_channel = output_channel


        #last layer
        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))

        #make sequential
        self.features = nn.Sequential(*features)

        #classificador
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, num_classes))

    def __forward_impl(self, x):
        x = self.features(x)
        x = nn.functional.adaptive_avg_pool2d(x,1).reshape(x.shape[0],-1)
        x = self.classifier(x)

        return x

    def forward(self, x):
        x = self.__forward_impl(x)
        return x


### Model declaration

In [None]:
bottleneckLayerDetail = [
    # t, c, n, s
    [1, 16, 1, 1],
    [6, 24, 2, 2],
    [6, 32, 3, 2],
    [6, 64, 4, 2],
    [6, 96, 3, 1],
    [6, 160, 3, 2],
    [6, 320, 1, 1],
]

our_model = OurObjectDetectionNet(bottleneckLayerDetail)
summary(our_model, (1,3,224,224))

### MobileNet V2

In [None]:
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

mobilenet_model = mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V2)

### Convert model for mobile

In [None]:
!pip install wget

In [None]:
import wget
import os.path

if not os.path.isfile("convert.py"):
    wget.download(
        "https://raw.githubusercontent.com/johnpolsh/inf721-tpfinal/main/colab/convert.py",
        "convert.py")

In [None]:
from convert import convert_for_mobile

convert_for_mobile(mobilenet_model, "object_detection")