In [1]:
# import warnings
# warnings.filterwarnings('ignore')

# # %%script python -- arg1 arg2 arg3

# import ivy
# ivy.set_backend("torch")
# from googlenet import inceptionNet_v1
# from ivy_models_tests import helpers
# import torch

# def test():
#     model = inceptionNet_v1(pretrained=False)
#     output = model(torch.randn(1, 3, 224, 224))
#     print(output.size())

# test()

# Start

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# global
import builtins
import ivy
import ivy_models
import numpy as np

In [4]:

# Building the initial Convolutional Block
class ConvBlock(ivy.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        super(ConvBlock, self).__init__()

    def _build(self, *args, **kwargs):
        self.conv = ivy.Conv2D(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, with_bias=False)
        self.bn = ivy.BatchNorm2D(self.out_channels)
        self.activation = ivy.ReLU()

    def _forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)
        return x

In [5]:

class Inception(ivy.Module):
    def __init__(
        self,
        in_channels,
        num1x1,
        num3x3_reduce,
        num3x3,
        num5x5_reduce,
        num5x5,
        pool_proj,
    ):
        self.in_channels = in_channels
        self.num1x1 = num1x1
        self.num3x3_reduce = num3x3_reduce
        self.num3x3 = num3x3
        self.num5x5_reduce = num5x5_reduce
        self.num5x5 = num5x5
        self.pool_proj = pool_proj
        super(Inception, self).__init__()
    
    def _build(self, *args, **kwargs):
        self.block1 = ivy.Sequential(ConvBlock(self.in_channels, self.num1x1, kernel_size=[1, 1])
                                     )
        self.block2 = ivy.Sequential(ConvBlock(self.in_channels, self.num3x3_reduce, kernel_size=[1, 1]),
                                    ConvBlock(self.num3x3_reduce, self.num3x3, kernel_size=[3, 3], padding=1)
                                    )
        self.block3 = ivy.Sequential(ConvBlock(self.in_channels, self.num5x5_reduce, kernel_size=[1, 1]),
                                    ConvBlock(self.num5x5_reduce, self.num5x5, kernel_size=[3, 3], padding=1)
                                    )
        self.block4 = ivy.Sequential(ivy.MaxPool2D(3, 1, 1),
                                    ConvBlock(self.in_channels, self.pool_proj, kernel_size=[1, 1])
                                    )
        
    def _forward(self, x):
        block1 = self.block1(x)
        block2 = self.block2(x)
        block3 = self.block3(x)
        block4 = self.block4(x)

        return ivy.concat([block1, block2, block3, block4], axis=3)

In [6]:

class Auxiliary(ivy.Module):
    def __init__(self, in_channels, num_classes):
        self.in_channels = in_channels
        self.num_classes = num_classes
        super(Auxiliary, self).__init__()

    def _build(self, *args, **kwargs):
        self.pool = ivy.AvgPool2D((5, 5), 3, 0)  # ivy.Shape(1, 4, 4, 512)
        self.bn = ivy.BatchNorm2D(128)
        self.conv = ivy.Conv2D(self.in_channels, 128, [1,1], 1, 0, with_bias=False)
        self.activation = ivy.ReLU()
        self.fc1 = ivy.Linear(2048, 1024)
        self.dropout = ivy.Dropout(0.7)
        self.fc2 = ivy.Linear(1024, self.num_classes)

    def _forward(self, x):
        out = self.pool(x)
        out = self.conv(out) # contains weights
        out = self.activation(out)
        out = ivy.flatten(out, start_dim=1)
        out = self.fc1(out) # contains weights
        out = self.activation(out)
        out = self.dropout(out)
        out = self.fc2(out) # contains weights
        return out

In [7]:

class GoogLeNet(ivy.Module):
    def __init__(self, num_classes=1000, v: ivy.Container = None,):
        if v is not None:
            self.v = v
        self.num_classes = num_classes
        super(GoogLeNet, self).__init__(v=v)

    def _build(self, *args, **kwargs):
        self.conv1 = ConvBlock(3, 64, [7,7], 2, 3)
        self.pool1 = ivy.MaxPool2D([3,3], 2, 1)
        self.conv2 = ConvBlock(64, 64, [1,1], 1, 0,)
        self.conv3 = ConvBlock(64, 192, [3,3], 1, 1)
        self.pool3 = ivy.MaxPool2D(3, 2, 1)
        self.inception3A = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3B = Inception(256, 128, 128, 192, 32, 96, 64)
        self.pool4 = ivy.MaxPool2D(3, 2, 1)

        self.inception4A = Inception(480, 192, 96, 208, 16, 48, 64)

        # ivy.flatten()
        self.aux4A = Auxiliary(512, self.num_classes)

        self.inception4B = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4C = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4D = Inception(512, 112, 144, 288, 32, 64, 64)

        self.aux4D = Auxiliary(528, self.num_classes)

        self.inception4E = Inception(528, 256, 160, 320, 32, 128, 128)
        self.pool5 = ivy.MaxPool2D(3, 2, 1)

        self.inception5A = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5B = Inception(832, 384, 192, 384, 48, 128,128)
        self.pool6 = ivy.AvgPool2D((7,7), 1, 0) # ((1, 1))

        # ivy.flatten()
        self.dropout = ivy.Dropout(0.4)
        self.fc = ivy.Linear(1024, self.num_classes)


    def _forward(self, x):
        out = self.conv1(x)
        out = self.pool1(out)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.pool3(out)
        out = self.inception3A(out)
        out = self.inception3B(out)
        out = self.pool4(out)
        out = self.inception4A(out)

        aux1 = self.aux4A(out)

        out = self.inception4B(out)
        out = self.inception4C(out)
        out = self.inception4D(out)

        aux2 = self.aux4D(out)

        out = self.inception4E(out)
        out = self.pool5(out)
        out = self.inception5A(out)
        out = self.inception5B(out)
        out = self.pool6(out)
        out = ivy.flatten(out, start_dim=1)
        out = self.dropout(out)
        out = self.fc(out)

        return out, aux1, aux2


In [9]:
# model = GoogLeNet()
# model.v

In [11]:

def _inceptionNet_torch_weights_mapping(old_key, new_key):
    W_KEY = ["conv/weight"]
    new_mapping = new_key
    if any([kc in old_key for kc in W_KEY]):
        new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
    return new_mapping


In [12]:
import torch

def inceptionNet_v1(pretrained=True):
    """InceptionNet-V1 model"""
    if not pretrained:
        return GoogLeNet()

    reference_model = GoogLeNet()
    url = "https://download.pytorch.org/models/googlenet-1378be20.pth"
    w_clean = ivy_models.helpers.load_torch_weights(
        url,
        reference_model,
        raw_keys_to_prune=["num_batches_tracked"],
        custom_mapping=_inceptionNet_torch_weights_mapping,
        )
    display("calling model with weights!")
    return GoogLeNet(v=w_clean)



In [13]:
# reference_model = GoogLeNet()
# url = "https://download.pytorch.org/models/googlenet-1378be20.pth"
# w_clean = ivy_models.helpers.load_torch_weights(
#     url,
#     reference_model,
#     raw_keys_to_prune=["num_batches_tracked"],
#     custom_mapping=_inceptionNet_torch_weights_mapping,
#     )
# display(w_clean)

In [14]:
from torchvision import transforms
from PIL import Image 

def load_and_preprocess_img(
    path, new_size, crop, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
):
    img = Image.open(path)
    compose = transforms.Compose(
        [
            transforms.Resize(new_size),
            transforms.CenterCrop(crop),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std),
        ]
    )
    img = compose(img)
    img = img.unsqueeze(0).permute((0, 2, 3, 1))
    return img.numpy()

In [16]:
import ivy
ivy.set_backend("torch")

# Load image
this_dir = "/models/images/cat.jpg"
img = ivy.asarray(load_and_preprocess_img(this_dir, 256, 224))
display(img.shape)

ivy.Shape(1, 224, 224, 3)

In [17]:
import torch
from torchvision.models import googlenet

input_tensor = ivy.reshape(img, (1, 3, 224, 224))
gt_model = googlenet()
gt_model.eval()
with torch.no_grad():
    output_gt = gt_model(input_tensor)

output_gt.shape

torch.Size([1, 1000])

In [18]:
model = inceptionNet_v1(pretrained=True)

TypeError: ConvBlock.__init__() missing 2 required positional arguments: 'stride' and 'padding'

In [None]:
# input_tensor = ivy.reshape(input_tensor, (1, 224, 224, 3))
output, _, _ = model(img)

display(output_gt.shape)
display(output.shape)
# display(model.v)

IvyBackendException: torch: concat:   File "/ivy/ivy/ivy/utils/exceptions.py", line 210, in _handle_exceptions
    return fn(*args, **kwargs)
  File "/ivy/ivy/ivy/func_wrapper.py", line 972, in _handle_nestable
    return fn(*args, **kwargs)
  File "/ivy/ivy/ivy/func_wrapper.py", line 864, in _handle_out_argument
    return fn(*args, out=out, **kwargs)
  File "/ivy/ivy/ivy/func_wrapper.py", line 440, in _inputs_to_native_arrays
    return fn(*new_args, **new_kwargs)
  File "/ivy/ivy/ivy/func_wrapper.py", line 546, in _outputs_to_ivy_arrays
    ret = fn(*args, **kwargs)
  File "/ivy/ivy/ivy/func_wrapper.py", line 357, in _handle_array_function
    return fn(*args, **kwargs)
  File "/ivy/ivy/ivy/functional/backends/torch/manipulation.py", line 43, in concat
    return torch.cat(xs, dim=axis, out=out)
During the handling of the above exception, another exception occurred:
 Sizes of tensors must match except in dimension 3. Expected size 28 but got size 30 for tensor number 2 in the list.

Shape test

In [None]:
assert output.shape == tuple([1, 1000])

NameError: name 'output' is not defined

Value test

In [None]:
# indices of logits in decending order from both outputs
calc_indices = ivy.argsort(output[0], descending=True)[:3]
true_indices = ivy.argsort(output_gt[0], descending=True)[:3]
display(calc_indices)
display(true_indices)

ivy.array([ 69, 452, 575])

ivy.array([761, 803, 601])

In [None]:
assert np.array_equal(true_indices, calc_indices)

NameError: name 'np' is not defined

In [None]:
calc_logits = ivy.take_along_axis(output[0], calc_indices, 0)
true_logits = output_gt[0, true_indices[0]].tolist()
display(calc_logits)
display(true_logits)

ivy.array([569.6154785, 747.3538818, 659.2441406])

[8.073171615600586, 8.19470500946045, 6.8008599281311035]

In [None]:
assert np.allclose(true_logits, calc_logits, rtol=0.5)