# Intro
Inference notebook for [Hotel-ID starter - classification - traning](https://www.kaggle.com/code/michaln/hotel-id-starter-classification-traning)



# Setup

In [25]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

# Imports

In [26]:
import numpy as np
import pandas as pd
import random
import os
import math

In [27]:
from PIL import Image as pil_image
from tqdm import tqdm

In [28]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# Global

In [29]:
SEED = 42
IMG_SIZE = 384

PROJECT_FOLDER = "../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/"
TEST_DATA_FOLDER = PROJECT_FOLDER + "test_images/"

In [30]:
print(os.listdir(PROJECT_FOLDER))

['sample_submission.csv', 'train_images', 'train_masks', 'test_images']


In [31]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Dataset and transformations

In [32]:
import albumentations as A
import albumentations.pytorch as APT
import cv2 

IMG_SIZE = 384

# used for training dataset - augmentations and occlusions
train_transform = A.Compose([
    A.HorizontalFlip(p=0.75),
    A.VerticalFlip(p=0.25),
    A.ShiftScaleRotate(p=0.5, border_mode=cv2.BORDER_CONSTANT),
    A.OpticalDistortion(p=0.25),
    A.Perspective(p=0.25),
    
    # Add Gaussian Blur
    A.GaussianBlur(p=0.5, sigma_limit=(0.1, 2.0)),

   # CLAHE to imrove contrast  
    A.CLAHE(p=0.5),

    
    # Perspective Transform to imrpove generalizations from different angles 
    A.Perspective(p=0.5, scale=(0.05, 0.15)),


    
    A.CoarseDropout(p=0.5, min_holes=1, max_holes=6, 
                    min_height=IMG_SIZE//16, max_height=IMG_SIZE//4,
                    min_width=IMG_SIZE//16,  max_width=IMG_SIZE//4), # normal coarse dropout
    
    A.CoarseDropout(p=0.75, max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions in test data

    A.RandomBrightnessContrast(p=0.75),
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# used for validation dataset - only occlusions
val_transform = A.Compose([
    A.CoarseDropout(p=0.75, max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

  data = fetch_version_info()


In [33]:
def pad_image(img):
    w, h, c = np.shape(img)
    if w > h:
        pad = int((w - h) / 2)
        img = cv2.copyMakeBorder(img, 0, 0, pad, pad, cv2.BORDER_CONSTANT, value=0)
    else:
        pad = int((h - w) / 2)
        img = cv2.copyMakeBorder(img, pad, pad, 0, 0, cv2.BORDER_CONSTANT, value=0)
        
    return img


def open_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = pad_image(img)
    return cv2.resize(img, (IMG_SIZE, IMG_SIZE))

In [34]:
class HotelImageDataset:
    def __init__(self, data, transform=None, data_folder="train_images/"):
        self.data = data
        self.data_folder = data_folder
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        record = self.data.iloc[idx]
        image_path = self.data_folder + record["image_id"]
        
        image = np.array(open_and_preprocess_image(image_path)).astype(np.uint8)

        if self.transform:
            transformed = self.transform(image=image)
            image = transformed["image"]
        
        return {
            "image" : image,
        }

# Model

In [35]:
class HotelIdModel(nn.Module):
    def __init__(self, n_classes=100, backbone_name="resnet34"):
        super(HotelIdModel, self).__init__()
        
        self.backbone = timm.create_model(backbone_name, num_classes=n_classes, pretrained=False)

    def forward(self, x):
        return self.backbone(x)

# Model helper functions

In [36]:
import torch
import numpy as np
import torchvision.transforms as T

def predict_tta(loader, model, n_matches=5, tta_transforms=3):
    """
    Perform Test-Time Augmentation (TTA) and return predictions.
    
    Parameters:
        - loader: DataLoader containing test images
        - model: Trained PyTorch model
        - n_matches: Number of top predictions to return
        - tta_transforms: Number of augmentations per image
    
    Returns:
        - preds: TTA-averaged predictions
    """
    # Automatically detect device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    model.eval()
    preds = []

    with torch.no_grad():
        for sample in loader:
            input_images = sample['image'].to(device)

            tta_outputs = []
            
            # Original image inference (weighted more)
            original_output = model(input_images)
            tta_outputs.append(1.5 * torch.sigmoid(original_output).cpu().numpy())  # Weighting original image

            # Define transformations for TTA
            transform_list = [
                lambda x: torch.flip(x, dims=[-1]),  # Horizontal Flip
                lambda x: T.ColorJitter(brightness=0.1, contrast=0.1)(x),  # Mild brightness/contrast
                lambda x: T.RandomAffine(degrees=5, translate=(0.05, 0.05))(x),  # Small rotations/translations
                lambda x: T.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0))(x)  # Mild blur
            ]
            
            for _ in range(tta_transforms):
                aug_fn = np.random.choice(transform_list)  # Randomly choose one augmentation
                aug_input = aug_fn(input_images)
                outputs = model(aug_input)
                tta_outputs.append(torch.sigmoid(outputs).cpu().numpy())

            # Average multiple augmented predictions
            avg_outputs = np.mean(tta_outputs, axis=0)
            preds.extend(avg_outputs)

    # Get top predictions
    preds = np.argsort(-np.array(preds), axis=1)[:, :n_matches]
    return preds



In [37]:
import albumentations as A
import albumentations.pytorch as APT
import cv2 

# used for training dataset - augmentations and occlusions
train_transform = A.Compose([
    A.RandomCrop(width=64, height=64),
    A.HorizontalFlip(p=0.75),
    #A.VerticalFlip(p=0.0),
    A.ShiftScaleRotate(p=0.5, shift_limit=0.0625, scale_limit=0.1, rotate_limit=10, interpolation=cv2.INTER_NEAREST, border_mode=cv2.BORDER_CONSTANT),
    A.OpticalDistortion(p=0.25, distort_limit=0.05, shift_limit=0.01),
    A.Perspective(p=0.25, scale=(0.05, 0.1)),
    A.ColorJitter(p=0.75, brightness=0.2, contrast=0.2, saturation=0.1, hue=0.05),
    A.CoarseDropout(p=0.5, min_holes=1, max_holes=5, 
                    min_height=IMG_SIZE//16, max_height=IMG_SIZE//8,
                    min_width=IMG_SIZE//16,  max_width=IMG_SIZE//8), # normal coarse dropout
    
    A.CoarseDropout(p=0.75, max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions in test data
    #A.RandomBrightnessContrast(p=0.75),
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# used for validation dataset - only occlusions
val_transform = A.Compose([
    A.CoarseDropout(p=0.75, max_holes=1, 
                    min_height=IMG_SIZE//4, max_height=IMG_SIZE//2,
                    min_width=IMG_SIZE//4,  max_width=IMG_SIZE//2, 
                    fill_value=(255,0,0)),# simulating occlusions
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# no augmentations
base_transform = A.Compose([
    A.ToFloat(),
    APT.transforms.ToTensorV2(),
])

# Prepare data

In [38]:
test_df = pd.DataFrame(data={"image_id": os.listdir(TEST_DATA_FOLDER), "hotel_id": ""}).sort_values(by="image_id")

In [39]:
# code hotel_id mapping created in training notebook by encoding hotel_ids
hotel_id_code_df = pd.read_csv('../input/resnet-training/hotel_id_code_mapping.csv')
hotel_id_code_map = hotel_id_code_df.set_index('hotel_id_code').to_dict()["hotel_id"]

# Prepare model

In [40]:
def get_model(model_type, backbone_name, checkpoint_path, args):
    model = HotelIdModel(args.n_classes, backbone_name)
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model"])
    model = model.to(args.device)
    
    return model

In [41]:
class args:
    batch_size = 64
    num_workers = 2
    n_classes = hotel_id_code_df["hotel_id"].nunique()
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    
    
seed_everything(seed=SEED)

test_dataset = HotelImageDataset(test_df, base_transform, data_folder=TEST_DATA_FOLDER)
test_loader = DataLoader(test_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

In [42]:
import torch

def get_model(model_type, backbone_name, checkpoint_path, args):
    model = HotelIdModel(args.n_classes, backbone_name)
    
    # Load the checkpoint with map_location
    checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))  
    
    model.load_state_dict(checkpoint["model"])
    model = model.to(args.device)  # Ensure it's moved to the correct device (CPU/GPU)
    
    return model

In [43]:
model = get_model("classification", "resnet34", 
                  "../input/resnet-training/checkpoint-classification-model-resnet34-256x256.pt", 
                  args)

print(model)


  checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))


HotelIdModel(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act1): ReLU(inplace=True)
        (aa): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding

# Submission

In [44]:
%%time

preds = predict_tta(test_loader, model, n_matches=5, tta_transforms=3)
# replace classes with hotel_id using mapping created in trainig notebook
preds = [[hotel_id_code_map[b] for b in a] for a in preds]
# transform array of hotel_ids into string
test_df["hotel_id"] = [str(list(l)).strip("[]").replace(",", "") for l in preds]

test_df.to_csv("submission.csv", index=False)
test_df.head()

CPU times: user 876 ms, sys: 134 ms, total: 1.01 s
Wall time: 760 ms


Unnamed: 0,image_id,hotel_id
0,abc.jpg,24700 18800 308350 108817 40941
