In [1]:
import os
from pathlib import Path
from dataclasses import dataclass
from enum import Enum
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

from albumentations import *
from albumentations.pytorch import ToTensorV2
import cv2

In [2]:
device = torch.device('cuda')

In [3]:
import torch
import torch.nn as nn
import torch.nn.init as init

__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']

model_urls = {
    'squeezenet1_0': '../input/fourthpth/visionmaster/visionmaster/torchvision/models/squeezenet1_0-a815701f.pth',
    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
}


class Fire(nn.Module):

    def __init__(self, inplanes, squeeze_planes,
                 expand1x1_planes, expand3x3_planes):
        super(Fire, self).__init__()
        self.inplanes = inplanes
        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
        self.squeeze_activation = nn.ReLU(inplace=True)
        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
                                   kernel_size=1)
        self.expand1x1_activation = nn.ReLU(inplace=True)
        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
                                   kernel_size=3, padding=1)
        self.expand3x3_activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ], 1)


class SqueezeNet(nn.Module):

    def __init__(self, version='1_0', num_classes=1000):
        super(SqueezeNet, self).__init__()
        self.num_classes = num_classes
        if version == '1_0':
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        elif version == '1_1':
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        else:
            # FIXME: Is this needed? SqueezeNet should only be called from the
            # FIXME: squeezenet1_x() functions
            # FIXME: This checking is not done for the other models
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1_0 or 1_1 expected".format(version=version))

        # Final convolution is initialized differently from the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return torch.flatten(x, 1)


def _squeezenet(version, pretrained, **kwargs):
    model = SqueezeNet(version, **kwargs)
    if pretrained:
        arch = 'squeezenet' + version
        state_dict = torch.load(model_urls[arch])
        model.load_state_dict(state_dict)
    return model


def squeezenet1_0(pretrained=False,**kwargs):
    r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
    accuracy with 50x fewer parameters and <0.5MB model size"
    <https://arxiv.org/abs/1602.07360>`_ paper.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _squeezenet('1_0', pretrained, **kwargs)


def squeezenet1_1(pretrained=False, progress=True, **kwargs):
    r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
    <https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1>`_.
    SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
    than SqueezeNet 1.0, without sacrificing accuracy.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _squeezenet('1_1', pretrained, progress, **kwargs)


In [4]:
class SqueezeFeatures(nn.Module):
    def __init__(self):
        super(SqueezeFeatures, self).__init__()
        base_model =  squeezenet1_0(pretrained=True)

        self.seq1 = nn.Sequential(base_model.features 
                                  )
        self.out_channels = 512

    def forward(self, x):
        x = self.seq1(x)

        return x
backbone = SqueezeFeatures()
backbone.out_channels = 512

In [5]:
def fasterrcnn_squeeze(pretrained=False, progress=True,
                            num_classes=2, pretrained_backbone=True,
                             trainable_backbone_layers=3, **kwargs):
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    model = FasterRCNN(backbone, num_classes, **kwargs)
    return model

In [6]:
def initialize_model():
    model = fasterrcnn_squeeze(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
    return model

In [7]:
model = initialize_model()

In [8]:
save_model_path = "../input/fourthpth/results/results"

In [9]:
model.load_state_dict(torch.load(os.path.join(save_model_path, f"best_model_epoch.pth")))
model.to(device)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): SqueezeFeatures(
    (seq1): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
        (1): ReLU(inplace=True)
        (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
        (3): Fire(
          (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
          (squeeze_activation): ReLU(inplace=True)
          (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
          (expand1x1_activation): ReLU(inplace=True)
          (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (expand3x3_activation): ReLU(inplace=True)
        )
        (4): Fire(
          (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
          (squeeze_activation): ReLU(inpl

In [10]:
@dataclass
class DatasetArguments:
    data_dir: Path
    images_lists_dict: dict
    labels_csv_file_name: str

@dataclass
class DataLoaderArguments:
    batch_size: int
    num_workers: int
    dataset_arguments: DatasetArguments

In [11]:
phase="test"

In [12]:
root_data_dir = Path("/kaggle/input/global-wheat-detection/")
unlabeled_generated_images_path = Path(f"/kaggle/input/global-wheat-detection/{phase}/")

In [13]:
def get_images_file_names(directory):
    _, _, files = next(os.walk(directory))
    return files
test_file_names = get_images_file_names(unlabeled_generated_images_path)
test_file_names = [x.split(".")[0] for x in test_file_names]

In [14]:
images_lists_dict = {
    "test": test_file_names
}

In [15]:
prediction_dataset_arguments = DatasetArguments(
    data_dir=root_data_dir,
    images_lists_dict=images_lists_dict,
    labels_csv_file_name="sample_submission.csv",
)
predict_dataloaders_arguments = DataLoaderArguments(
    batch_size=8,
    num_workers=0,
    dataset_arguments=prediction_dataset_arguments
)

In [16]:
def transform_set():
    transforms_dict = {
        'test': get_test_transforms()
    }
    return transforms_dict


def get_test_transforms():
    return Compose(
        [
            ToTensorV2(p=1.0),
        ]
    )


In [17]:
class ObjectDetectionDataset(Dataset):
    def __init__(self, images_root_directory, images_list, labels_csv_file_name, phase, transforms):
        super(ObjectDetectionDataset).__init__()
        self.images_root_directory = images_root_directory
        self.phase = phase
        self.transforms = transforms
        self.images_list = images_list
        if self.phase in ["train", "val"]:
            self.labels_dataframe = pd.read_csv(os.path.join(images_root_directory, labels_csv_file_name))

    def __getitem__(self, item):
        sample = {
            "local_image_id": None,
            "image_id": None,
            "labels": None,
            "boxes": None,
            "area": None,
            "iscrowd": None
        }

        image_id = self.images_list[item]
        image_path = os.path.join(self.images_root_directory,
                                  "train" if self.phase in ["train", "val"] else "test",
                                  image_id + ".jpg")
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        sample["local_image_id"] = image_id
        sample["image_id"] = torch.tensor([item])
        if self.phase in ["train", "val"]:
            boxes = self.labels_dataframe[self.labels_dataframe.image_id == image_id].bbox.values.tolist()
            boxes = [eval(box_i) for box_i in boxes]
            areas = _areas(boxes)
            boxes = _adjust_boxes_format(boxes)

            sample["labels"] = torch.ones((len(boxes),), dtype=torch.int64)
            sample["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
            sample["area"] = torch.as_tensor(areas, dtype=torch.float32)
            sample["iscrowd"] = torch.zeros((len(boxes),), dtype=torch.int64)
        if self.transforms is not None:
            sample["image"] = image
            if self.phase in ["train", "val"]:
                transformed_sample = self.transforms(image=sample["image"],
                                                     bboxes=sample["boxes"],
                                                     labels=sample["labels"])
                sample["boxes"] = torch.as_tensor(transformed_sample["bboxes"], dtype=torch.float32)
            else:
                transformed_sample = self.transforms(image=sample["image"])
            image = transformed_sample["image"]
            del sample["image"]
        return image, sample
    def __len__(self):
        return len(self.images_list)

In [18]:
def create_dataset(arguments):
    dataset = ObjectDetectionDataset(arguments.data_dir,
                                     arguments.images_lists_dict[arguments.phase],
                                     arguments.labels_csv_file_name,
                                     arguments.phase,
                                     arguments.transforms)
    return dataset

In [19]:
def create_prediction_dataloader(arguments, input_size):
    data_transforms = transform_set()
    batch_size = arguments.batch_size
    num_workers = arguments.num_workers
    arguments.dataset_arguments.phase = phase
    arguments.dataset_arguments.transforms = data_transforms["test"]
    image_datasets = create_dataset(arguments.dataset_arguments)
    dataloader = DataLoader(image_datasets, batch_size=batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=num_workers,
                            collate_fn=collate_fn)
    return dataloader


def collate_fn(batch):
    return tuple(zip(*batch))

In [20]:
dataloader = create_prediction_dataloader(predict_dataloaders_arguments, None)

In [21]:
detection_threshold=0.45
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [22]:
results = []

In [23]:
for images, sample in dataloader:
    image_ids = [x["local_image_id"] for x in sample]
    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores) if boxes.shape[0] > 0 else ""
        }

        
        results.append(result)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


In [24]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])

In [25]:
test_df.to_csv('submission.csv', index=False)

In [26]:
test_df.head()

Unnamed: 0,image_id,PredictionString
0,2fd875eaa,0.9862 106 584 142 81 0.9774 461 355 127 98 0....
1,cc3532ff6,0.9713 486 578 96 133 0.9525 83 598 103 165 0....
2,51b3e36ab,0.9791 109 843 157 97 0.9738 234 638 91 165 0....
3,53f253011,0.9797 226 835 119 101 0.9433 15 31 141 109 0....
4,cb8d261a3,0.8716 258 773 128 72 0.8562 759 715 79 81 0.8...
