# Creating the Object Detection Model
I'm going to do some exploratory analysis here before I flesh out the notebook. Easier to write it in a notebook instead of a script because I can test the functionality


## Import Necessary Libraries and Set Torch Home

In [1]:
import torch
import timm
import os

In [2]:
cache_dir = './data/models/'
os.environ['TORCH_HOME'] = cache_dir

In [3]:
timm.list_models('inception*', pretrained=True)

['inception_resnet_v2', 'inception_v3', 'inception_v4']

In [4]:
timm.list_models('resnet*', pretrained=True)

['resnet10t',
 'resnet14t',
 'resnet18',
 'resnet18d',
 'resnet26',
 'resnet26d',
 'resnet26t',
 'resnet32ts',
 'resnet33ts',
 'resnet34',
 'resnet34d',
 'resnet50',
 'resnet50_gn',
 'resnet50d',
 'resnet51q',
 'resnet61q',
 'resnet101',
 'resnet101d',
 'resnet152',
 'resnet152d',
 'resnet200d',
 'resnetaa50',
 'resnetblur50',
 'resnetrs50',
 'resnetrs101',
 'resnetrs152',
 'resnetrs200',
 'resnetrs270',
 'resnetrs350',
 'resnetrs420',
 'resnetv2_50',
 'resnetv2_50d_evos',
 'resnetv2_50d_gn',
 'resnetv2_50x1_bit_distilled',
 'resnetv2_50x1_bitm',
 'resnetv2_50x1_bitm_in21k',
 'resnetv2_50x3_bitm',
 'resnetv2_50x3_bitm_in21k',
 'resnetv2_101',
 'resnetv2_101x1_bitm',
 'resnetv2_101x1_bitm_in21k',
 'resnetv2_101x3_bitm',
 'resnetv2_101x3_bitm_in21k',
 'resnetv2_152x2_bit_teacher',
 'resnetv2_152x2_bit_teacher_384',
 'resnetv2_152x2_bitm',
 'resnetv2_152x2_bitm_in21k',
 'resnetv2_152x4_bitm',
 'resnetv2_152x4_bitm_in21k']

In [29]:
if not timm.list_models('inception_resnet_v2', pretrained=True):
    print('Blah')

In [48]:
base_model = timm.create_model('inception_resnet_v2', pretrained=True)
seq_model = timm.create_model('inception_resnet_v2', pretrained=True)

In [49]:
# Inspect the configuration of the model. Gives some useful information.
# We see that there are 1000 class outputs for the model, and the classifier head is named "classif"
base_model.default_cfg

{'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/inception_resnet_v2-940b1cd6.pth',
 'num_classes': 1000,
 'input_size': (3, 299, 299),
 'pool_size': (8, 8),
 'crop_pct': 0.8975,
 'interpolation': 'bicubic',
 'mean': (0.5, 0.5, 0.5),
 'std': (0.5, 0.5, 0.5),
 'first_conv': 'conv2d_1a.conv',
 'classifier': 'classif',
 'label_offset': 1,
 'architecture': 'inception_resnet_v2'}

In [63]:
base_model.default_cfg['input_size']

(3, 299, 299)

In [50]:
# Get the model layers
layers = list(base_model.children())
print('Model layers: ', layers)
print('Num layers: ', len(layers))
print(base_model.get_classifier())


Model layers:  [BasicConv2d(
  (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False), BasicConv2d(
  (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(192, eps=0.001,

In [51]:
base_model.get_classifier().in_features

1536

### Model Information
In the above cell, we see there are 17 layers for this model, with the last 2 being the adaptive pooling and classifier heads.
What we want to do is remove the classification head and use the base model as a feature extractor.
Setting the classifier to the Identity matrix might do this for us, or we can just take all but the last 2 layers.
Let's examine!

In [41]:
# Replace the classification head with the Identity matrix
base_model.classif = torch.nn.Identity()

In [42]:
base_model.get_classifier()

Identity()

In [44]:
from torch import nn

In [43]:
layers = list(seq_model.children())
num_layers = len(layers)
print('Model layers: ', layers)
print('Num layers: ', num_layers)


Model layers:  [BasicConv2d(
  (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False), BasicConv2d(
  (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
), BasicConv2d(
  (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn): BatchNorm2d(192, eps=0.001,

15

In [46]:
seq_layers = layers[:num_layers-2]
seq_layers = nn.Sequential(*seq_layers)
print('Sequential Layers ', seq_layers)
print('Num layers ', len(seq_layers))

Sequential Layers  Sequential(
  (0): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (1): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (2): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (5): BasicConv2d(
    (conv): Conv2d(80, 1

### Fruit Detection Model
Creating a model here that we can use to test our inference capabilities

In [52]:
# Import required libraries
import timm
import torch
from torch import nn

In [57]:
class FruitDetector(nn.Module):
    def __init__(self, base_model_type, num_classes):
        super(FruitDetector, self).__init__()

        # Check to see if the passed in base model type is in the list of TIMM models
        # If it isn't, raise an exception
        assert timm.list_models(base_model_type, pretrained=True), "Must supply a valid pretrained model string"
        assert num_classes is not None, 'Must supply num_classes'

        # Create the base model using the TIMM library
        self.base_model = timm.create_model(base_model_type, pretrained=True)
        # Assign our number of classes for the model
        self.num_classes = num_classes

        # Now we need to get all but the classification head of the base model
        # We'll be extending it with our own classifier head and box regression head
        # To do so, (experiment) we will get the named classifier head and replace it with the Identity matrix
        # so that we just get the features as output from our base model

        # Get the base classifier name so we can replace it
        base_classifier_name = self.base_model.default_cfg['classifier']
        # Get the number of input features to the base model classifier
        base_model_input_features = self.base_model.get_classifier().in_features
        # Replace the base model classifier with the Identity() matrix
        self.base_model.base_classifier_name = nn.Identity()

        # Can get all layers but the classification head this way too
        base_model_layers = list(self.base_model.children())
        base_model_num_layers = len(base_model_layers)
        base_model_layers = base_model_layers[:base_model_num_layers - 2]
        self.base_model = nn.Sequential(*base_model_layers)

        # Create the updated box regression and classification heads
        # Need to pass in the base model input features to each head and then downsample in layers

    def forward(self, x):
        # Just testing for now. Trying to determine what is output here
        features = self.base_model(x)
        return features

In [60]:
base_model_name = 'inception_resnet_v200'
num_classes = 3
model = FruitDetector(base_model_name, num_classes)

RuntimeError: Unknown model (inception_resnet_v200)