# Inference

This notebook is for generating Kaggle submission file. It currently incorporates two neural networks that perform segmentation and then classification.

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import torch
import pandas as pd
import albumentations as A
import numpy as np

from tqdm import tqdm
from scripts.models import CoinLocalizer, CoinClassifier
from scripts.training import get_best_available_device, load_params
from scripts.utils import SegmentationDataset, split_data, ClassificationDataset, \
    generate_hough, get_cropped_image, get_segmentation, get_class, get_bb_coordinates
from scripts.config import example_row, row_template, ID_TO_LABEL, ID_TO_CCY, ID_TO_EUR, ID_TO_SIDE, ID_TO_CHF_IMG

from matplotlib import pyplot as plt
from torchvision.utils import draw_bounding_boxes
from PIL import Image
from collections import Counter
from torch.utils.data import DataLoader

## Dataset

Specify the test directory, generate Imagenet specific transforms, initialize the dataset and put it into dataloader.

In [5]:
test_directory = "../data/test"
test_image_paths, _, _, _ = split_data(test_directory, 0.0, 'inference')

In [6]:
seg_tf = A.Compose([
    A.Resize(width=600, height=400, always_apply=True),
    A.PadIfNeeded(min_height=416, min_width=608, always_apply=True),
])

cls_tf = A.Compose([
    A.Resize(width=224, height=224, always_apply=True),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), always_apply=True)  # imagenet specific
])

In [7]:
test_ds = SegmentationDataset(
    image_paths=test_image_paths,
    transform=seg_tf,
)

test_loader = DataLoader(test_ds, batch_size=1)

## Models

Initialize segmentation and classification models and load pre-trained weights.

In [8]:
# initialize models
seg_model = CoinLocalizer()
seg_model = load_params(seg_model, 'segmentation_.pt')

ccy_model = CoinClassifier(num_classes=2, coin_type="ccy")
ccy_model = load_params(ccy_model, 'classification_ccy.pt')

eur_model = CoinClassifier(num_classes=8, coin_type="eur", freeze=False)
eur_model = load_params(eur_model, 'classification_eur.pt')

side_model = CoinClassifier(num_classes=2, coin_type="heads-tails")
side_model = load_params(side_model, 'classification_heads-tails.pt')

chf_tail_model = CoinClassifier(num_classes=7, coin_type="chf-tails", freeze=False)
chf_tail_model = load_params(chf_tail_model, 'classification_chf-tails.pt')

chf_head_model = CoinClassifier(num_classes=3, coin_type="chf-heads", freeze=False)
chf_head_model = load_params(chf_head_model, 'classification_chf-heads.pt')

In [9]:
# put them to mps or gpu if possible
device = get_best_available_device()

ccy_model = ccy_model.to(device)
model = seg_model.to(device)
eur_model = eur_model.to(device)
side_model = side_model.to(device)
chf_tail_model = chf_tail_model.to(device)
chf_head_model = chf_head_model.to(device)

## Inference

Iterate over images, extract coins and predict their class.

In [10]:
# generate empty df for predictions
df = pd.DataFrame(columns=example_row)

In [11]:
# define label mappers
id_to_label = np.vectorize(lambda x: ID_TO_LABEL.get(x, "unknown"))
id_to_ccy = np.vectorize(lambda x: ID_TO_CCY.get(x, "unknown"))
id_to_eur = np.vectorize(lambda x: ID_TO_EUR.get(x, "unknown"))
id_to_side = np.vectorize(lambda x: ID_TO_SIDE.get(x, "unknown"))
id_to_chf_img = np.vectorize(lambda x: ID_TO_CHF_IMG.get(x, "unknown"))

In [12]:
for i, (image, _, filename) in enumerate(tqdm(test_loader, desc='Analyzing images')):

    image = image.to(device)

    predicted = get_segmentation(model, image)

    circles, hough_img = generate_hough(predicted, image)
    
    original_img = Image.open(f"../data/test/{filename[0]}")
    original_img = np.array(original_img)
    
    # segmentation was done on smaller images -> reset the coordinates for original images
    x_ratio = 6000 / image.shape[3]
    y_ratio = 4000 / image.shape[2]

    labels = []
    text_labels = []
    boxes = []

    for j, (x, y, r) in enumerate(circles):
        
        cropped_image = get_cropped_image(original_img, x, y, r, x_ratio, y_ratio)
        
        box = list(get_bb_coordinates(x, y, r, x_ratio, y_ratio))
        boxes.append(box)
        
        # initiate the dataloader
        coin_loader = DataLoader(ClassificationDataset(cropped_image, transform=cls_tf))
        coin_iterator = iter(coin_loader)
        coin, _, radius = next(coin_iterator)
        
        coin = coin.to(device)
        
        # predict currency
        ccy_id, ccy_label, ccy_prob = get_class(ccy_model, coin, id_to_ccy)
        
        # if not sure about the currency -> OOD
        if ccy_prob < 0.8:  
            text_labels.append("OOD")
        
        # if EUR -> predict EUR coin type
        elif ccy_id == 1:
            
            eur_id, eur_label, eur_prob = get_class(eur_model, coin, id_to_eur)
            
            if eur_prob < 0.5:
                text_labels.append("OOD")
            else:
                text_labels.append(f"{eur_label}")
        
        # if CHF -> predict CHF head or tails
        else:
            
            side_id, side_label, side_prob = get_class(side_model, coin, id_to_side)

            # if tail -> predict CHF coin type
            if side_id == 0:
                chf_tail_id, chf_tail_label, chf_tail_prob = get_class(
                    chf_tail_model, coin, id_to_label
                )
                text_labels.append(f"{chf_tail_label}")
            
            # if head -> predict picture type (3 options)
            else:
                chf_head_id, chf_head_label, chf_head_prob = get_class(
                    chf_head_model, coin, id_to_chf_img
                )
                
                # since 5CHF has distinctive picture
                if chf_head_id == 0:
                    text_labels.append("5CHF")
                
                # TODO: need to have dynamic radius dependent decision-making
                else:
                    text_labels.append(f"{chf_head_label}") 
    
    prob_img = draw_bounding_boxes(
        image=torch.tensor(original_img, dtype=torch.uint8).permute(2, 0, 1), 
        boxes=torch.tensor(boxes, dtype=torch.int16), 
        labels=text_labels, 
        colors="#FF0000",
        font="Arial",
        width=16,
        font_size=110
    )
    
    im = Image.fromarray(prob_img.cpu().numpy().transpose(1, 2, 0))
    im.save(f"../inference/{filename[0]}")

Analyzing images: 100%|██████████| 162/162 [06:25<00:00,  2.38s/it]


In [51]:
# add index name as necessary for Kaggle
df.index.name='id'
df.head()

Unnamed: 0_level_0,5CHF,2CHF,1CHF,0.5CHF,0.2CHF,0.1CHF,0.05CHF,2EUR,1EUR,0.5EUR,0.2EUR,0.1EUR,0.05EUR,0.02EUR,0.01EUR,OOD
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
L0000000,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0
L0000001,0,4,0,0,2,0,0,1,0,1,2,0,0,0,0,0
L0000002,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
L0000003,0,0,1,2,0,0,1,0,0,0,0,1,1,1,0,1
L0000004,0,1,0,1,0,2,0,2,0,0,0,0,0,0,0,0


In [52]:
df.to_csv('test_submission.csv')