In [1]:
import os
import cv2
import json
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import timm
import math
import torch.nn.functional as F

from itertools import count
from tqdm import tqdm
import albumentations as aug
import albumentations.pytorch as APT
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageOps

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
        cos(theta + m)
    """

    def __init__(
        self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, device="cuda"
    ):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
        self.device = device

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = torch.zeros(cosine.size(), device=self.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

class HotelIdBackbone(nn.Module):
    """Adapted from michaln's work on Hotel-ID 2021"""

    def __init__(self, embed_size=256, backbone_name="efficientnet_b3"):
        super(HotelIdBackbone, self).__init__()

        self.embed_size = embed_size
        self.backbone = timm.create_model(backbone_name, pretrained=False)
        in_features = self.backbone.get_classifier().in_features

        fc_name, _ = list(self.backbone.named_modules())[-1]
        if fc_name == "classifier":
            self.backbone.classifier = nn.Identity()
        elif fc_name == "head.fc":
            self.backbone.head.fc = nn.Identity()
        elif fc_name == "fc":
            self.backbone.fc = nn.Identity()
        elif fc_name == "head.flatten":
            self.backbone.head.fc = nn.Identity()
        elif fc_name == "head":
            self.backbone.head = nn.Identity()
        else:
            raise Exception("unknown classifier layer: " + fc_name)

        self.post = nn.Sequential(
            nn.utils.weight_norm(nn.Linear(in_features, self.embed_size * 2), dim=None),
            nn.BatchNorm1d(self.embed_size * 2),
            nn.Dropout(0.2),
            nn.utils.weight_norm(nn.Linear(self.embed_size * 2, self.embed_size)),
            nn.BatchNorm1d(self.embed_size)
        )

    def forward(self, input):
        x = self.backbone(input)
        x = x.view(x.size(0), -1)
        x = self.post(x)
        return x

class HotelID(nn.Module):
    def __init__(
        self,
        num_embedding: int,
        num_hotels: int,
        backbone: str,
        **kwargs
    ):
        super(HotelID, self).__init__(**kwargs)

        self.num_embedding = num_embedding
        self.num_hotels = num_hotels

        # Embedding layer
        self.embedding_layer = HotelIdBackbone(
            self.num_embedding, backbone
        )

        # Use ArcMargin as our prediction, before the cross-entropy loss
        self.prediction_layer = ArcMarginProduct(
            self.num_embedding,
            self.num_hotels,
            s=30.0,
            m=0.20,
            easy_margin=False
        )

    def embed(self, inputs):
        return self.embedding_layer(inputs)

    def forward(self, inputs, labels=None):

        embeddings = self.embed(inputs)
        if labels is not None:
            preds = self.prediction_layer(embeddings, labels)
            return embeddings, preds
        return embeddings

In [4]:
TEST_TRANSFORM = aug.Compose(
    [
        aug.Resize(width=512, height=512),
        aug.Normalize(
            mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225),
            max_pixel_value=255.0
        ),
        aug.ToFloat(),
        APT.transforms.ToTensorV2(),
    ]
)

class ImageDataset(Dataset):

    def __init__(self, img_paths, ids=None, labels=False, transform=None, **kwargs) -> None:
        super(ImageDataset, self).__init__(**kwargs)
        self.images = sorted([os.path.abspath(elem) for elem in img_paths])
        self.transform = transform

        if labels:
            assert ids is not None, "Hotel ids should be provided at training"
            self.hotel_ids = list(sorted(ids))
            self.id2label = {k:v for k, v in zip(self.hotel_ids, count())}

        self.get_func = self.train_get if labels else self.test_get

    def __len__(self):
        return len(self.images)


    def train_get(self, idx):
        img_path = self.images[idx]
        hotel_id = img_path.split(os.sep)[-2]
        label = self.id2label[hotel_id]
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)

        return {
            "img": self.transform(image=img)["image"],
            "label": label,
            "id": int(hotel_id)
        }

    def test_get(self, idx):

        img_path = self.images[idx]
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)

        return {
            "img": self.transform(image=img)["image"]
        }

    def __getitem__(self, idx):
        return self.get_func(idx)

In [5]:
def get_model(num_embedding, num_classes, backbone_name, checkpoint_path, device):
    model = HotelID(num_embedding, num_classes, backbone_name)
        
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["state_dict"])
    model = model.to(device)
    
    return model

In [6]:
with open("/kaggle/input/id2label/id2label.json", "r") as fin:
    id2label = json.load(fin)

label2id = {v:k for k, v in id2label.items()}
num_classes = len(id2label)

In [7]:
model = get_model(num_embedding=4096, num_classes=num_classes, backbone_name="eca_nfnet_l2", checkpoint_path="/kaggle/input/hotelid/pytorch/m2_v3_eca/1/ckpt_19_eca_nfnet_l2.pth", device=device)



In [8]:
PROJECT_FOLDER = "/kaggle/input/hotel-id-to-combat-human-trafficking-2022-fgvc9/"
TEST_FOLDER = os.path.join(PROJECT_FOLDER, "test_images/")
TRAIN_FOLDER = os.path.join(PROJECT_FOLDER, "train_images/")

In [9]:
from glob import glob

In [10]:
dset = ImageDataset(sorted(glob(os.path.join(TRAIN_FOLDER, "**", "*.jpg"), recursive=True)), transform=TEST_TRANSFORM, ids=list(id2label.keys()), labels=True)
base_loader = DataLoader(dset, batch_size=16, shuffle=False)

In [11]:
base_embeddings = torch.tensor([], device=device)
base_hotel_ids = torch.tensor([], device=device)

In [12]:
model.eval()
with torch.no_grad():
    for batch in tqdm(base_loader):
        batch = {k:v.to(device) for k, v in batch.items()}
        inputs = batch["img"]
        ids = batch["id"]
        base_embeddings = torch.cat((base_embeddings, model(inputs)))
        base_hotel_ids = torch.cat((base_hotel_ids, ids))

100%|██████████| 2794/2794 [54:40<00:00,  1.17s/it]


In [13]:
test_files = sorted([os.path.join(TEST_FOLDER, elem) for elem in os.listdir(TEST_FOLDER)])
predictions = []

In [14]:
with torch.no_grad():
    for image_file in tqdm(test_files):
        prediction = []

        image = cv2.cvtColor(cv2.imread(image_file),cv2.COLOR_BGR2RGB)
        image = TEST_TRANSFORM(image=image)["image"]
        image = image.unsqueeze(0)
        image = image.to(device)

        embedding = model(image)
        distances = torch.cosine_similarity(embedding, base_embeddings)
        sorted_dist, indices = distances.sort(descending=True)
        for hid in base_hotel_ids[indices]:
            if hid in prediction:
                continue
            prediction.append(hid)
            if len(prediction) == 5:
                break
        predictions.append(" ".join(str(int(pred)) for pred in prediction))

100%|██████████| 1/1 [00:00<00:00,  2.72it/s]


In [15]:
df = pd.DataFrame(
    data={
        "image_id": (os.path.basename(path) for path in test_files),
        "hotel_id": predictions,
    }
).sort_values(by="image_id")

In [16]:
df.to_csv("submission.csv", index=False)