# Summary

Select models in do inference.

## Import libs

In [None]:
import os
import math
import random

# For data manipulation
import pandas as pd

from matplotlib import pyplot as plt

# Pytorch Imports
import torch
import torch.optim as optim
from torch.utils.data import DataLoader

# Utils
from tqdm import tqdm

from sklearn.model_selection import GroupKFold

In [None]:
from hcd_3_common import (
    generate_submit_csv,
    load_weights,
    prepare_loaders,
    set_seed,
    HCDDataset,
    data_transforms,
    HCDModel_Res50,
    CONFIG,
    IS_INTERACTIVE,
)

## Configurations

In [None]:
ROOT_FOLDER = "/kaggle/input/histopathologic-cancer-detection"

# select the weights for the models to ensemble
# select the best results from the folds
BEST_WEIGHTS = [
    "/kaggle/input/hcd-3-improve-model-all-folds/AUROC1.00_Loss0.0911_fold0_epoch15.bin"
]

In [None]:
# Following configurations are often overwritten

CONFIG["valid_batch_size"] = 256
CONFIG["img_size"] = 96
CONFIG["tta_size"]=16
print("CONFIG:", CONFIG)

In [None]:
set_seed(CONFIG['seed'])

## Load data

In [None]:
models = []

for weight_file_path in BEST_WEIGHTS:
    model = HCDModel_Res50(num_classes=CONFIG["num_classes"], weights=CONFIG["weights_name"])
    weights_loaded = load_weights(model, weight_file_path)
    if weights_loaded:
        model.to(CONFIG['device'])
        models.append(model)
    

## Inference

In [None]:
def generate_submit_csv(models, root_folder, transforms):
    test_df = pd.read_csv(os.path.join(root_folder, "sample_submission.csv"))
    test_df["file_path"] = test_df["id"].apply(lambda image_id: os.path.join(root_folder, "test", f"{image_id}.tif"))
    print("test_df shape:", test_df.shape)

    test_dataset = HCDDataset(test_df, transforms=transforms["valid"], transformed_images_per_item=CONFIG["tta_size"])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                                  num_workers=2, shuffle=False, pin_memory=True)

    preds = []

    total_step = len(test_loader)
    log_step = math.ceil(total_step / 10) # we log each epoch 10 times in log

    with torch.no_grad():
        bar = tqdm(enumerate(test_loader), total=len(test_loader), disable=(not IS_INTERACTIVE))
        for step, data in bar:        
            images = data['image'].to(CONFIG["device"], dtype=torch.float)        
            
            # Process TTA by splitting the input image tensor
            # split along the dim=1, which is multiple transformed images for TTA
            images = torch.chunk(images, chunks=images.size(1), dim=1)
                
            outputs = []
            for model in models:
                for image in images:
                    outputs.append(torch.squeeze(model(torch.squeeze(image))))
 
            #_, predicted = torch.max(torch.sum(torch.stack(outputs), dim=0), dim=1)
            predicted = torch.sum(torch.stack(outputs), dim=0)/len(models)/len(images)
            preds.extend(predicted.tolist())

            if not IS_INTERACTIVE and (step%log_step==0):
                print(step, "/", total_step)

    print(len(preds))
    
    test_df["label"] = preds
    test_df[["id", "label"]].to_csv("submission.csv", index=False)

In [None]:

generate_submit_csv(models, ROOT_FOLDER, data_transforms)
