In [1]:
import os
from pathlib import Path
from dataclasses import dataclass
from enum import Enum
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

from albumentations import *
from albumentations.pytorch import ToTensorV2
import cv2

In [2]:
device = torch.device('cuda')

In [3]:
import warnings
from collections import namedtuple
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.jit.annotations import Optional, Tuple
from torch import Tensor

__all__ = ['GoogLeNet', 'googlenet', "GoogLeNetOutputs", "_GoogLeNetOutputs"]

model_urls = {
    # GoogLeNet ported from TensorFlow
    'googlenet': '/kaggle/input/fourthpth/googlenet-1378be20.pth',
}

GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1'])
GoogLeNetOutputs.__annotations__ = {'logits': Tensor, 'aux_logits2': Optional[Tensor],
                                    'aux_logits1': Optional[Tensor]}

# Script annotations failed with _GoogleNetOutputs = namedtuple ...
# _GoogLeNetOutputs set here for backwards compat
_GoogLeNetOutputs = GoogLeNetOutputs


def googlenet(pretrained=False, **kwargs):
    r"""GoogLeNet (Inception v1) model architecture from
    `"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
        aux_logits (bool): If True, adds two auxiliary branches that can improve training.
            Default: *False* when pretrained is True otherwise *True*
        transform_input (bool): If True, preprocesses the input according to the method with which it
            was trained on ImageNet. Default: *False*
    """
    if pretrained:
        if 'transform_input' not in kwargs:
            kwargs['transform_input'] = True
        if 'aux_logits' not in kwargs:
            kwargs['aux_logits'] = False
        if kwargs['aux_logits']:
            warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, '
                          'so make sure to train them')
        original_aux_logits = kwargs['aux_logits']
        kwargs['aux_logits'] = True
        kwargs['init_weights'] = False
        model = GoogLeNet(**kwargs)
        state_dict = model_urls
        model.load_state_dict(state_dict)
        if not original_aux_logits:
            model.aux_logits = False
            model.aux1 = None
            model.aux2 = None
        return model

    return GoogLeNet(**kwargs)


class GoogLeNet(nn.Module):
    __constants__ = ['aux_logits', 'transform_input']

    def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, init_weights=None,
                 blocks=None):
        super(GoogLeNet, self).__init__()
        if blocks is None:
            blocks = [BasicConv2d, Inception, InceptionAux]
        if init_weights is None:
            warnings.warn('The default weight initialization of GoogleNet will be changed in future releases of '
                          'torchvision. If you wish to keep the old behavior (which leads to long initialization times'
                          ' due to scipy/scipy#11299), please set init_weights=True.', FutureWarning)
            init_weights = True
        assert len(blocks) == 3
        conv_block = blocks[0]
        inception_block = blocks[1]
        inception_aux_block = blocks[2]

        self.aux_logits = aux_logits
        self.transform_input = transform_input

        self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.conv2 = conv_block(64, 64, kernel_size=1)
        self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)

        self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)

        if aux_logits:
            self.aux1 = inception_aux_block(512, num_classes)
            self.aux2 = inception_aux_block(528, num_classes)
        else:
            self.aux1 = None
            self.aux2 = None

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(1024, num_classes)

        if init_weights:
            self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                import scipy.stats as stats
                X = stats.truncnorm(-2, 2, scale=0.01)
                values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
                values = values.view(m.weight.size())
                with torch.no_grad():
                    m.weight.copy_(values)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _transform_input(self, x):
        # type: (Tensor) -> Tensor
        if self.transform_input:
            x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
        return x

    def _forward(self, x):
        # type: (Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        aux1 = torch.jit.annotate(Optional[Tensor], None)
        if self.aux1 is not None:
            if self.training:
                aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        aux2 = torch.jit.annotate(Optional[Tensor], None)
        if self.aux2 is not None:
            if self.training:
                aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        return x, aux2, aux1

    @torch.jit.unused
    def eager_outputs(self, x, aux2, aux1):
        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> GoogLeNetOutputs
        if self.training and self.aux_logits:
            return _GoogLeNetOutputs(x, aux2, aux1)
        else:
            return x

    def forward(self, x):
        # type: (Tensor) -> GoogLeNetOutputs
        x = self._transform_input(x)
        x, aux1, aux2 = self._forward(x)
        aux_defined = self.training and self.aux_logits
        if torch.jit.is_scripting():
            if not aux_defined:
                warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple")
            return GoogLeNetOutputs(x, aux2, aux1)
        else:
            return self.eager_outputs(x, aux2, aux1)


class Inception(nn.Module):

    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj,
                 conv_block=None):
        super(Inception, self).__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, ch3x3red, kernel_size=1),
            conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, ch5x5red, kernel_size=1),
            # Here, kernel_size=3 instead of kernel_size=5 is a known bug.
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1)
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            conv_block(in_channels, pool_proj, kernel_size=1)
        )

    def _forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return outputs

    def forward(self, x):
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionAux(nn.Module):

    def __init__(self, in_channels, num_classes, conv_block=None):
        super(InceptionAux, self).__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.conv = conv_block(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        # N x 1024
        x = F.dropout(x, 0.7, training=self.training)
        # N x 1024
        x = self.fc2(x)
        # N x 1000 (num_classes)

        return x


class BasicConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

In [4]:
class GoogleFeatures(nn.Module):
    def __init__(self):
        super(GoogleFeatures, self).__init__()
        base_model =  googlenet(pretrained=True)

        self.seq1 = nn.Sequential(base_model.conv1,
                                  base_model.maxpool1,
                                  base_model.conv2,
                                  base_model.conv3,
                                  base_model.maxpool2
                                  )
        self.seq2 = nn.Sequential(base_model.inception3a,
                                  base_model.inception3b,
                                  base_model.maxpool3,
                                  base_model.inception4a,
                                  base_model.aux1
                                  )
        self.seq3 = nn.Sequential(base_model.inception4b,
                                  base_model.inception4c,
                                  base_model.inception4d,
                                  base_model.aux2
                                  )
        self.seq4 = nn.Sequential(base_model.inception4e,
                                  base_model.maxpool4,
                                  base_model.inception5a,
                                  base_model.inception5b
                                  )
        self.out_channels = 192

    def forward(self, x):
        x = self.seq1(x)

        return x
backbone = GoogleFeatures()
backbone.out_channels = 192

RuntimeError: Error(s) in loading state_dict for GoogLeNet:
	Missing key(s) in state_dict: "conv1.conv.weight", "conv1.bn.weight", "conv1.bn.bias", "conv1.bn.running_mean", "conv1.bn.running_var", "conv2.conv.weight", "conv2.bn.weight", "conv2.bn.bias", "conv2.bn.running_mean", "conv2.bn.running_var", "conv3.conv.weight", "conv3.bn.weight", "conv3.bn.bias", "conv3.bn.running_mean", "conv3.bn.running_var", "inception3a.branch1.conv.weight", "inception3a.branch1.bn.weight", "inception3a.branch1.bn.bias", "inception3a.branch1.bn.running_mean", "inception3a.branch1.bn.running_var", "inception3a.branch2.0.conv.weight", "inception3a.branch2.0.bn.weight", "inception3a.branch2.0.bn.bias", "inception3a.branch2.0.bn.running_mean", "inception3a.branch2.0.bn.running_var", "inception3a.branch2.1.conv.weight", "inception3a.branch2.1.bn.weight", "inception3a.branch2.1.bn.bias", "inception3a.branch2.1.bn.running_mean", "inception3a.branch2.1.bn.running_var", "inception3a.branch3.0.conv.weight", "inception3a.branch3.0.bn.weight", "inception3a.branch3.0.bn.bias", "inception3a.branch3.0.bn.running_mean", "inception3a.branch3.0.bn.running_var", "inception3a.branch3.1.conv.weight", "inception3a.branch3.1.bn.weight", "inception3a.branch3.1.bn.bias", "inception3a.branch3.1.bn.running_mean", "inception3a.branch3.1.bn.running_var", "inception3a.branch4.1.conv.weight", "inception3a.branch4.1.bn.weight", "inception3a.branch4.1.bn.bias", "inception3a.branch4.1.bn.running_mean", "inception3a.branch4.1.bn.running_var", "inception3b.branch1.conv.weight", "inception3b.branch1.bn.weight", "inception3b.branch1.bn.bias", "inception3b.branch1.bn.running_mean", "inception3b.branch1.bn.running_var", "inception3b.branch2.0.conv.weight", "inception3b.branch2.0.bn.weight", "inception3b.branch2.0.bn.bias", "inception3b.branch2.0.bn.running_mean", "inception3b.branch2.0.bn.running_var", "inception3b.branch2.1.conv.weight", "inception3b.branch2.1.bn.weight", "inception3b.branch2.1.bn.bias", "inception3b.branch2.1.bn.running_mean", "inception3b.branch2.1.bn.running_var", "inception3b.branch3.0.conv.weight", "inception3b.branch3.0.bn.weight", "inception3b.branch3.0.bn.bias", "inception3b.branch3.0.bn.running_mean", "inception3b.branch3.0.bn.running_var", "inception3b.branch3.1.conv.weight", "inception3b.branch3.1.bn.weight", "inception3b.branch3.1.bn.bias", "inception3b.branch3.1.bn.running_mean", "inception3b.branch3.1.bn.running_var", "inception3b.branch4.1.conv.weight", "inception3b.branch4.1.bn.weight", "inception3b.branch4.1.bn.bias", "inception3b.branch4.1.bn.running_mean", "inception3b.branch4.1.bn.running_var", "inception4a.branch1.conv.weight", "inception4a.branch1.bn.weight", "inception4a.branch1.bn.bias", "inception4a.branch1.bn.running_mean", "inception4a.branch1.bn.running_var", "inception4a.branch2.0.conv.weight", "inception4a.branch2.0.bn.weight", "inception4a.branch2.0.bn.bias", "inception4a.branch2.0.bn.running_mean", "inception4a.branch2.0.bn.running_var", "inception4a.branch2.1.conv.weight", "inception4a.branch2.1.bn.weight", "inception4a.branch2.1.bn.bias", "inception4a.branch2.1.bn.running_mean", "inception4a.branch2.1.bn.running_var", "inception4a.branch3.0.conv.weight", "inception4a.branch3.0.bn.weight", "inception4a.branch3.0.bn.bias", "inception4a.branch3.0.bn.running_mean", "inception4a.branch3.0.bn.running_var", "inception4a.branch3.1.conv.weight", "inception4a.branch3.1.bn.weight", "inception4a.branch3.1.bn.bias", "inception4a.branch3.1.bn.running_mean", "inception4a.branch3.1.bn.running_var", "inception4a.branch4.1.conv.weight", "inception4a.branch4.1.bn.weight", "inception4a.branch4.1.bn.bias", "inception4a.branch4.1.bn.running_mean", "inception4a.branch4.1.bn.running_var", "inception4b.branch1.conv.weight", "inception4b.branch1.bn.weight", "inception4b.branch1.bn.bias", "inception4b.branch1.bn.running_mean", "inception4b.branch1.bn.running_var", "inception4b.branch2.0.conv.weight", "inception4b.branch2.0.bn.weight", "inception4b.branch2.0.bn.bias", "inception4b.branch2.0.bn.running_mean", "inception4b.branch2.0.bn.running_var", "inception4b.branch2.1.conv.weight", "inception4b.branch2.1.bn.weight", "inception4b.branch2.1.bn.bias", "inception4b.branch2.1.bn.running_mean", "inception4b.branch2.1.bn.running_var", "inception4b.branch3.0.conv.weight", "inception4b.branch3.0.bn.weight", "inception4b.branch3.0.bn.bias", "inception4b.branch3.0.bn.running_mean", "inception4b.branch3.0.bn.running_var", "inception4b.branch3.1.conv.weight", "inception4b.branch3.1.bn.weight", "inception4b.branch3.1.bn.bias", "inception4b.branch3.1.bn.running_mean", "inception4b.branch3.1.bn.running_var", "inception4b.branch4.1.conv.weight", "inception4b.branch4.1.bn.weight", "inception4b.branch4.1.bn.bias", "inception4b.branch4.1.bn.running_mean", "inception4b.branch4.1.bn.running_var", "inception4c.branch1.conv.weight", "inception4c.branch1.bn.weight", "inception4c.branch1.bn.bias", "inception4c.branch1.bn.running_mean", "inception4c.branch1.bn.running_var", "inception4c.branch2.0.conv.weight", "inception4c.branch2.0.bn.weight", "inception4c.branch2.0.bn.bias", "inception4c.branch2.0.bn.running_mean", "inception4c.branch2.0.bn.running_var", "inception4c.branch2.1.conv.weight", "inception4c.branch2.1.bn.weight", "inception4c.branch2.1.bn.bias", "inception4c.branch2.1.bn.running_mean", "inception4c.branch2.1.bn.running_var", "inception4c.branch3.0.conv.weight", "inception4c.branch3.0.bn.weight", "inception4c.branch3.0.bn.bias", "inception4c.branch3.0.bn.running_mean", "inception4c.branch3.0.bn.running_var", "inception4c.branch3.1.conv.weight", "inception4c.branch3.1.bn.weight", "inception4c.branch3.1.bn.bias", "inception4c.branch3.1.bn.running_mean", "inception4c.branch3.1.bn.running_var", "inception4c.branch4.1.conv.weight", "inception4c.branch4.1.bn.weight", "inception4c.branch4.1.bn.bias", "inception4c.branch4.1.bn.running_mean", "inception4c.branch4.1.bn.running_var", "inception4d.branch1.conv.weight", "inception4d.branch1.bn.weight", "inception4d.branch1.bn.bias", "inception4d.branch1.bn.running_mean", "inception4d.branch1.bn.running_var", "inception4d.branch2.0.conv.weight", "inception4d.branch2.0.bn.weight", "inception4d.branch2.0.bn.bias", "inception4d.branch2.0.bn.running_mean", "inception4d.branch2.0.bn.running_var", "inception4d.branch2.1.conv.weight", "inception4d.branch2.1.bn.weight", "inception4d.branch2.1.bn.bias", "inception4d.branch2.1.bn.running_mean", "inception4d.branch2.1.bn.running_var", "inception4d.branch3.0.conv.weight", "inception4d.branch3.0.bn.weight", "inception4d.branch3.0.bn.bias", "inception4d.branch3.0.bn.running_mean", "inception4d.branch3.0.bn.running_var", "inception4d.branch3.1.conv.weight", "inception4d.branch3.1.bn.weight", "inception4d.branch3.1.bn.bias", "inception4d.branch3.1.bn.running_mean", "inception4d.branch3.1.bn.running_var", "inception4d.branch4.1.conv.weight", "inception4d.branch4.1.bn.weight", "inception4d.branch4.1.bn.bias", "inception4d.branch4.1.bn.running_mean", "inception4d.branch4.1.bn.running_var", "inception4e.branch1.conv.weight", "inception4e.branch1.bn.weight", "inception4e.branch1.bn.bias", "inception4e.branch1.bn.running_mean", "inception4e.branch1.bn.running_var", "inception4e.branch2.0.conv.weight", "inception4e.branch2.0.bn.weight", "inception4e.branch2.0.bn.bias", "inception4e.branch2.0.bn.running_mean", "inception4e.branch2.0.bn.running_var", "inception4e.branch2.1.conv.weight", "inception4e.branch2.1.bn.weight", "inception4e.branch2.1.bn.bias", "inception4e.branch2.1.bn.running_mean", "inception4e.branch2.1.bn.running_var", "inception4e.branch3.0.conv.weight", "inception4e.branch3.0.bn.weight", "inception4e.branch3.0.bn.bias", "inception4e.branch3.0.bn.running_mean", "inception4e.branch3.0.bn.running_var", "inception4e.branch3.1.conv.weight", "inception4e.branch3.1.bn.weight", "inception4e.branch3.1.bn.bias", "inception4e.branch3.1.bn.running_mean", "inception4e.branch3.1.bn.running_var", "inception4e.branch4.1.conv.weight", "inception4e.branch4.1.bn.weight", "inception4e.branch4.1.bn.bias", "inception4e.branch4.1.bn.running_mean", "inception4e.branch4.1.bn.running_var", "inception5a.branch1.conv.weight", "inception5a.branch1.bn.weight", "inception5a.branch1.bn.bias", "inception5a.branch1.bn.running_mean", "inception5a.branch1.bn.running_var", "inception5a.branch2.0.conv.weight", "inception5a.branch2.0.bn.weight", "inception5a.branch2.0.bn.bias", "inception5a.branch2.0.bn.running_mean", "inception5a.branch2.0.bn.running_var", "inception5a.branch2.1.conv.weight", "inception5a.branch2.1.bn.weight", "inception5a.branch2.1.bn.bias", "inception5a.branch2.1.bn.running_mean", "inception5a.branch2.1.bn.running_var", "inception5a.branch3.0.conv.weight", "inception5a.branch3.0.bn.weight", "inception5a.branch3.0.bn.bias", "inception5a.branch3.0.bn.running_mean", "inception5a.branch3.0.bn.running_var", "inception5a.branch3.1.conv.weight", "inception5a.branch3.1.bn.weight", "inception5a.branch3.1.bn.bias", "inception5a.branch3.1.bn.running_mean", "inception5a.branch3.1.bn.running_var", "inception5a.branch4.1.conv.weight", "inception5a.branch4.1.bn.weight", "inception5a.branch4.1.bn.bias", "inception5a.branch4.1.bn.running_mean", "inception5a.branch4.1.bn.running_var", "inception5b.branch1.conv.weight", "inception5b.branch1.bn.weight", "inception5b.branch1.bn.bias", "inception5b.branch1.bn.running_mean", "inception5b.branch1.bn.running_var", "inception5b.branch2.0.conv.weight", "inception5b.branch2.0.bn.weight", "inception5b.branch2.0.bn.bias", "inception5b.branch2.0.bn.running_mean", "inception5b.branch2.0.bn.running_var", "inception5b.branch2.1.conv.weight", "inception5b.branch2.1.bn.weight", "inception5b.branch2.1.bn.bias", "inception5b.branch2.1.bn.running_mean", "inception5b.branch2.1.bn.running_var", "inception5b.branch3.0.conv.weight", "inception5b.branch3.0.bn.weight", "inception5b.branch3.0.bn.bias", "inception5b.branch3.0.bn.running_mean", "inception5b.branch3.0.bn.running_var", "inception5b.branch3.1.conv.weight", "inception5b.branch3.1.bn.weight", "inception5b.branch3.1.bn.bias", "inception5b.branch3.1.bn.running_mean", "inception5b.branch3.1.bn.running_var", "inception5b.branch4.1.conv.weight", "inception5b.branch4.1.bn.weight", "inception5b.branch4.1.bn.bias", "inception5b.branch4.1.bn.running_mean", "inception5b.branch4.1.bn.running_var", "aux1.conv.conv.weight", "aux1.conv.bn.weight", "aux1.conv.bn.bias", "aux1.conv.bn.running_mean", "aux1.conv.bn.running_var", "aux1.fc1.weight", "aux1.fc1.bias", "aux1.fc2.weight", "aux1.fc2.bias", "aux2.conv.conv.weight", "aux2.conv.bn.weight", "aux2.conv.bn.bias", "aux2.conv.bn.running_mean", "aux2.conv.bn.running_var", "aux2.fc1.weight", "aux2.fc1.bias", "aux2.fc2.weight", "aux2.fc2.bias", "fc.weight", "fc.bias". 
	Unexpected key(s) in state_dict: "googlenet". 

In [5]:
def fasterrcnn_google(pretrained=False, progress=True,
                            num_classes=2, pretrained_backbone=True,
                             trainable_backbone_layers=3, **kwargs):
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    model = FasterRCNN(backbone, num_classes, **kwargs)
    return model

In [6]:
def initialize_model():
    model = fasterrcnn_google(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
    return model

In [7]:
model = initialize_model()

NameError: name 'backbone' is not defined

In [8]:
save_model_path = "../input/fourthpth"

In [9]:
model.load_state_dict(torch.load(os.path.join(save_model_path, f"best_model_epoch4.pth")))
model.to(device)
model.eval()

NameError: name 'model' is not defined

In [10]:
@dataclass
class DatasetArguments:
    data_dir: Path
    images_lists_dict: dict
    labels_csv_file_name: str

@dataclass
class DataLoaderArguments:
    batch_size: int
    num_workers: int
    dataset_arguments: DatasetArguments

In [11]:
phase="test"

In [12]:
root_data_dir = Path("/kaggle/input/global-wheat-detection/")
unlabeled_generated_images_path = Path(f"/kaggle/input/global-wheat-detection/{phase}/")

In [13]:
def get_images_file_names(directory):
    _, _, files = next(os.walk(directory))
    return files
test_file_names = get_images_file_names(unlabeled_generated_images_path)
test_file_names = [x.split(".")[0] for x in test_file_names]

In [14]:
images_lists_dict = {
    "test": test_file_names
}

In [15]:
prediction_dataset_arguments = DatasetArguments(
    data_dir=root_data_dir,
    images_lists_dict=images_lists_dict,
    labels_csv_file_name="sample_submission.csv",
)
predict_dataloaders_arguments = DataLoaderArguments(
    batch_size=1,
    num_workers=0,
    dataset_arguments=prediction_dataset_arguments
)

In [16]:
def transform_set():
    transforms_dict = {
        'test': get_test_transforms()
    }
    return transforms_dict


def get_test_transforms():
    return Compose(
        [
            ToTensorV2(p=1.0),
        ]
    )


In [17]:
class ObjectDetectionDataset(Dataset):
    def __init__(self, images_root_directory, images_list, labels_csv_file_name, phase, transforms):
        super(ObjectDetectionDataset).__init__()
        self.images_root_directory = images_root_directory
        self.phase = phase
        self.transforms = transforms
        self.images_list = images_list
        if self.phase in ["train", "val"]:
            self.labels_dataframe = pd.read_csv(os.path.join(images_root_directory, labels_csv_file_name))

    def __getitem__(self, item):
        sample = {
            "local_image_id": None,
            "image_id": None,
            "labels": None,
            "boxes": None,
            "area": None,
            "iscrowd": None
        }

        image_id = self.images_list[item]
        image_path = os.path.join(self.images_root_directory,
                                  "train" if self.phase in ["train", "val"] else "test",
                                  image_id + ".jpg")
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        sample["local_image_id"] = image_id
        sample["image_id"] = torch.tensor([item])
        if self.phase in ["train", "val"]:
            boxes = self.labels_dataframe[self.labels_dataframe.image_id == image_id].bbox.values.tolist()
            boxes = [eval(box_i) for box_i in boxes]
            areas = _areas(boxes)
            boxes = _adjust_boxes_format(boxes)

            sample["labels"] = torch.ones((len(boxes),), dtype=torch.int64)
            sample["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
            sample["area"] = torch.as_tensor(areas, dtype=torch.float32)
            sample["iscrowd"] = torch.zeros((len(boxes),), dtype=torch.int64)
        if self.transforms is not None:
            sample["image"] = image
            if self.phase in ["train", "val"]:
                transformed_sample = self.transforms(image=sample["image"],
                                                     bboxes=sample["boxes"],
                                                     labels=sample["labels"])
                sample["boxes"] = torch.as_tensor(transformed_sample["bboxes"], dtype=torch.float32)
            else:
                transformed_sample = self.transforms(image=sample["image"])
            image = transformed_sample["image"]
            del sample["image"]
        return image, sample
    def __len__(self):
        return len(self.images_list)

In [18]:
def create_dataset(arguments):
    dataset = ObjectDetectionDataset(arguments.data_dir,
                                     arguments.images_lists_dict[arguments.phase],
                                     arguments.labels_csv_file_name,
                                     arguments.phase,
                                     arguments.transforms)
    return dataset

In [19]:
def create_prediction_dataloader(arguments, input_size):
    data_transforms = transform_set()
    batch_size = arguments.batch_size
    num_workers = arguments.num_workers
    arguments.dataset_arguments.phase = phase
    arguments.dataset_arguments.transforms = data_transforms["test"]
    image_datasets = create_dataset(arguments.dataset_arguments)
    dataloader = DataLoader(image_datasets, batch_size=batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=num_workers,
                            collate_fn=collate_fn)
    return dataloader


def collate_fn(batch):
    return tuple(zip(*batch))

In [20]:
dataloader = create_prediction_dataloader(predict_dataloaders_arguments, None)

In [21]:
detection_threshold=0.45
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [22]:
results = []

In [23]:
for images, sample in dataloader:
    image_ids = [x["local_image_id"] for x in sample]
    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores) if boxes.shape[0] > 0 else ""
        }

        
        results.append(result)

AssertionError: Torch not compiled with CUDA enabled

In [24]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])

In [25]:
test_df.to_csv('submission.csv', index=False)

In [26]:
test_df.head()

Unnamed: 0,image_id,PredictionString
