In [10]:
import sys
sys.path.insert(0, "../")

import os, glob
import json

import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix

import timm

In [11]:
NUM_CLASSES = len(label_names)
TEST_IMG_DIR = "../../../Dataset/Covid19/test_full_size_jpg/test_data_jpg/*.jpg"
WEIGHTS_PATH = os.path.join("../weights", "best.pt")
TEST_IMG2STUDY_PATH = "../../../Dataset/Covid19/test_image2study.json"
EXAMPLE_SUBMISSION_PATH = "../../../Dataset/Covid19/submission.csv"

# Inference dataset and data preprocessing

In [6]:
from torch.utils.data import Dataset
from torchvision.transforms import Compose, Resize, Normalize, ToTensor

def get_test_grayscale_transforms(img_size: int) -> Compose:
    """Returns data transformations/augmentations for train dataset.
    
    Args:
        img_size: The resolution of the input image (img_size x img_size)
    """
    return Compose([
        Resize([img_size, img_size], interpolation=3),
        ToTensor(),
        Normalize(
            mean=[0.5203580774185134],
            std=[0.24102417452995067])
    ])

class InferenceImageDataset(Dataset):
    """Custom Dataset for loading images from paths"""

    def __init__(self, img_paths, transform=None, label_names=None):
    
        self.img_paths = img_paths
        self.class_to_idx = {label:idx for idx, label in enumerate(label_names)}
        self.transform = transform

    def __getitem__(self, index):
        #if torch.is_tensor(index):
        #    idx = idx.tolist()
            
        img = Image.open(self.img_paths[index])
        
        if self.transform is not None:
            img = self.transform(img)
        
        img_path = self.img_paths[index]
        return img, img_path

    def __len__(self):
        return len(self.img_paths)

# Models

In [16]:
import timm

model_names = timm.list_models(pretrained=True)

def get_model(model_name, num_classes, input_channels, pretrained_path=None):
    model = timm.create_model(model_name, pretrained=True, num_classes=num_classes, in_chans=input_channels)
    
    if pretrained_path is not None:
        state_dict = torch.load(pretrained_path)
        model.load_state_dict(state_dict)
        
    return model

# Inference

In [9]:
label_names = ['indeterminate', 'negative', 'atypical', 'typical']
IMG_SIZE = 640

def inference(model, img_paths):
    
    dataset = InferenceImageDataset(img_paths, get_test_grayscale_transforms(IMG_SIZE), label_names, device="cuda")
    dataloader = DataLoader(dataset, batch_size=1)
    
    model.to(device)
    model.eval()
    predictions = list() 
    image_paths = list()
    with torch.no_grad():
        for X, img_paths in tqdm(dataloader):
            X = X.to(device)
            preds = model(X)
            predictions += list(preds.argmax(axis=1).cpu().detach().numpy())
            image_paths += list(img_paths)
    model.cpu()
    X.cpu()
    
    return predictions, image_paths

In [90]:
# Using gpu or not
CUDA = "cuda" if torch.cuda.is_available() else "cpu"
if CUDA == "cuda":
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")

model = get_efficientnetb0(NUM_CLASSES, num_channels=1)
state_dict = torch.load(WEIGHTS_PATH)
model.load_state_dict(state_dict["model_state_dict"])     



Using GPU: NVIDIA GeForce RTX 3070 Laptop GPU


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1263/1263 [02:53<00:00,  7.28it/s]


# Kaggle submission

In [96]:
submission = pd.read_csv(EXAMPLE_SUBMISSION_PATH)
with open(TEST_IMG2STUDY_PATH, 'r') as file:
    image2study = json.load(file)

In [97]:
study_counter = dict()
for img_path, _, label in zip(image_paths, predictions, [label_names[i] for i in predictions]):

    image_id = os.path.basename(img_path).replace(".jpg", "") + "_image"
    study_id = image2study[image_id]
    
    prediction_string = f"{label} 1 0 0 1 1 "
    if study_counter.get(study_id) is None:
        submission.loc[submission["id"]==study_id, "PredictionString"] = prediction_string
        study_counter[study_id] = 1
    else:
        submission.loc[submission["id"]==study_id, "PredictionString"] += prediction_string
        study_counter[study_id] += 1

    
submission.head(20)

amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk


Unnamed: 0,id,PredictionString
0,00188a671292_study,negative 1 0 0 1 1
1,004bd59708be_study,typical 1 0 0 1 1
2,00508faccd39_study,negative 1 0 0 1 1
3,006486aa80b2_study,typical 1 0 0 1 1
4,00655178fdfc_study,typical 1 0 0 1 1
5,00a81e8f1051_study,negative 1 0 0 1 1
6,00be7de16711_study,typical 1 0 0 1 1
7,00c7a3928f0f_study,negative 1 0 0 1 1
8,00d63957bc3a_study,typical 1 0 0 1 1
9,0107f2d291d6_study,negative 1 0 0 1 1


In [98]:
submission.to_csv("submission.csv", index=False)