DEFINE "PATH_TO_IMAGES" on your local machine

In [1]:
image_size = 512
seed = 42
batch_size = 1
num_workers = 24

kernel_type = 'tf_efficientnet_b4_ns'
data_dir = PATH_TO_IMAGES # !!! DEFINE "PATH_TO_IMAGES" on your local machine
model_dir = f'weights/'

In [2]:
import pandas as pd
import numpy as np
import sys
import os
import time
import cv2
import PIL.Image
import random
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR 
from warmup_scheduler import GradualWarmupScheduler
import albumentations as A
from albumentations import *
from tqdm import tqdm
import matplotlib.pyplot as plt
import gc
from sklearn.metrics import roc_auc_score
import seaborn as sns
from pylab import rcParams
import timm
from warnings import filterwarnings
from albumentations.pytorch import ToTensorV2
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CosineAnnealingWarmRestarts

from warnings import filterwarnings
filterwarnings("ignore")

In [3]:
class CovidEffnetModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=4, in_chans=3)
        n_features = self.model.classifier.in_features
        self.model.global_pool = nn.Identity()
        self.model.classifier = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 4)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

In [4]:
class CovidDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        image_id = row.image_id
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
                
        img = img.type('torch.FloatTensor')
        img = img / 255.0
        return torch.tensor(img).float(), image_id

In [5]:
valid_transform = A.Compose([
    A.Resize(image_size, image_size, p=1.0),
    ToTensorV2()
], p=1.0)
df_train = pd.read_csv('../cross_validation_scheme/crossval_info/original_data/multi_label_problem.csv')
df_train['image_name'] = df_train['image_id'].apply(lambda x: x.replace('_image', '.png'))
df_train['file_path'] = df_train.image_name.apply(lambda x: os.path.join(data_dir, f'{x}'))
dataset = CovidDataset(df_train, 'train', transform=valid_transform)
test_loader = torch.utils.data.DataLoader(
    dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
)

In [6]:
def tta_inference_func(model, test_loader):
    result = {}
    model.eval()
    bar = tqdm(test_loader)
    LOGITS = []
    PREDS = []
    IMAGES = []
    
    with torch.no_grad():
        for batch_idx, (images, image_id) in enumerate(bar):
            x = images.to(device)
            x = torch.stack([x,x.flip(-1)],0) # hflip
            x = x.view(-1, 3, image_size, image_size)
            logits = model(x)
            logits = logits.view(batch_size, 2, -1).mean(1)
            PREDS += [logits.sigmoid().detach().cpu()]
            LOGITS.append(logits.cpu())
            IMAGES += image_id
        PREDS = torch.cat(PREDS).cpu().numpy()
    result['image_id'] = IMAGES
    result['negative'] = PREDS[:, 0]
    result['typical'] = PREDS[:, 1]
    result['indeterminate'] = PREDS[:, 2]
    result['atypical'] = PREDS[:, 3]
    df = pd.DataFrame(result)
    return df

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
modelsEffnet = []
MODELS_EFFNET = [
    'weights/tf_efficientnet_b4_ns_fold0_best_AP.pth',
    'weights/tf_efficientnet_b4_ns_fold1_best_AP.pth',
    'weights/tf_efficientnet_b4_ns_fold2_best_AP.pth',
    'weights/tf_efficientnet_b4_ns_fold3_best_AP.pth',
    'weights/tf_efficientnet_b4_ns_fold4_best_AP.pth',
]
for path in MODELS_EFFNET:
    state_dict = torch.load(path, map_location=torch.device('cpu'))
    model = CovidEffnetModel(kernel_type, pretrained=False)
    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)
    modelsEffnet.append(model)
del state_dict

In [8]:
df_list = list()
for model in modelsEffnet:
    df = tta_inference_func(model, test_loader)
    df_list.append(df)

100%|██████████| 6334/6334 [01:49<00:00, 57.68it/s]
100%|██████████| 6334/6334 [01:49<00:00, 57.83it/s]
100%|██████████| 6334/6334 [01:49<00:00, 57.75it/s]
100%|██████████| 6334/6334 [01:50<00:00, 57.31it/s]
100%|██████████| 6334/6334 [01:48<00:00, 58.18it/s]


In [9]:
df_result = pd.concat(df_list).groupby('image_id').mean()
df_result

Unnamed: 0_level_0,negative,typical,indeterminate,atypical
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000a312787f2_image,0.083742,0.734340,0.154893,0.031727
000c3a3f293f_image,0.312830,0.410526,0.223395,0.079522
0012ff7358bc_image,0.189476,0.530153,0.223398,0.064374
001398f4ff4f_image,0.860194,0.049412,0.086627,0.021978
001bd15d1891_image,0.274183,0.532782,0.182322,0.049640
...,...,...,...,...
ffcc6edd9445_image,0.233897,0.511317,0.219824,0.074134
ffd91a2c4ca0_image,0.308624,0.512546,0.150411,0.055218
ffd9b6cf2961_image,0.673700,0.141603,0.163437,0.048110
ffdc682f7680_image,0.097647,0.722438,0.166575,0.031314


In [10]:
df_result.reset_index(inplace=True)
meta = df_train[['image_id', 'study_id']]
df_result = meta.merge(df_result, on='image_id')
df_result

Unnamed: 0,image_id,study_id,negative,typical,indeterminate,atypical
0,000a312787f2_image,5776db0cec75_study,0.083742,0.734340,0.154893,0.031727
1,000c3a3f293f_image,ff0879eb20ed_study,0.312830,0.410526,0.223395,0.079522
2,0012ff7358bc_image,9d514ce429a7_study,0.189476,0.530153,0.223398,0.064374
3,001398f4ff4f_image,28dddc8559b2_study,0.860194,0.049412,0.086627,0.021978
4,001bd15d1891_image,dfd9fdd85a3e_study,0.274183,0.532782,0.182322,0.049640
...,...,...,...,...,...,...
6329,ffcc6edd9445_image,7e6c68462e06_study,0.233897,0.511317,0.219824,0.074134
6330,ffd91a2c4ca0_image,8332bdaddb6e_study,0.308624,0.512546,0.150411,0.055218
6331,ffd9b6cf2961_image,7eed9af03814_study,0.673700,0.141603,0.163437,0.048110
6332,ffdc682f7680_image,a0cb0b96fb3d_study,0.097647,0.722438,0.166575,0.031314


In [11]:
df_result['negative'] = df_result.groupby(['study_id'])['negative'].transform(lambda x: np.max(x))
df_result['typical'] = df_result.groupby(['study_id'])['typical'].transform(lambda x: np.max(x))
df_result['indeterminate'] = df_result.groupby(['study_id'])['indeterminate'].transform(lambda x: np.max(x))
df_result['atypical'] = df_result.groupby(['study_id'])['atypical'].transform(lambda x: np.max(x))

In [12]:
df_result = df_result[['study_id', 'negative', 'typical', 'indeterminate', 'atypical']]

In [13]:
df_result[df_result['study_id']=='00f9e183938e_study']

Unnamed: 0,study_id,negative,typical,indeterminate,atypical
2641,00f9e183938e_study,0.668704,0.142875,0.148447,0.046062
2642,00f9e183938e_study,0.668704,0.142875,0.148447,0.046062


In [14]:
df_result = df_result.drop_duplicates()
df_result

Unnamed: 0,study_id,negative,typical,indeterminate,atypical
0,5776db0cec75_study,0.083742,0.734340,0.154893,0.031727
1,ff0879eb20ed_study,0.312830,0.410526,0.223395,0.079522
2,9d514ce429a7_study,0.189476,0.530153,0.223398,0.064374
3,28dddc8559b2_study,0.860194,0.049412,0.086627,0.021978
4,dfd9fdd85a3e_study,0.274183,0.532782,0.182322,0.049640
...,...,...,...,...,...
6329,7e6c68462e06_study,0.233897,0.511317,0.219824,0.074134
6330,8332bdaddb6e_study,0.308624,0.512546,0.150411,0.055218
6331,7eed9af03814_study,0.673700,0.141603,0.163437,0.048110
6332,a0cb0b96fb3d_study,0.097647,0.722438,0.166575,0.031314


In [15]:
def prepare_data(df):
    
    df['negative'] = df['negative'].astype(str)
    df['typical'] = df['typical'].astype(str)
    df['indeterminate'] = df['indeterminate'].astype(str)
    df['atypical'] = df['atypical'].astype(str)
    
    df.loc[:, 'negative'] = 'negative ' + df['negative'] + ' 0 0 1 1 '
    df.loc[:, 'typical'] = 'typical ' + df['typical'] + ' 0 0 1 1 '
    df.loc[:, 'indeterminate'] = 'indeterminate ' + df['indeterminate'] + ' 0 0 1 1 '
    df.loc[:, 'atypical'] = 'atypical ' + df['atypical'] + ' 0 0 1 1'
    df['PredictionString'] = df['negative'] + df['typical'] + df['indeterminate'] + df['atypical']
    df = df[['study_id', 'PredictionString']]
    
    return df

In [16]:
df_result = prepare_data(df_result)

In [17]:
df_result

Unnamed: 0,study_id,PredictionString
0,5776db0cec75_study,negative 0.083741985 0 0 1 1 typical 0.7343395...
1,ff0879eb20ed_study,negative 0.31282985 0 0 1 1 typical 0.410526 0...
2,9d514ce429a7_study,negative 0.18947591 0 0 1 1 typical 0.5301529 ...
3,28dddc8559b2_study,negative 0.8601944 0 0 1 1 typical 0.04941217 ...
4,dfd9fdd85a3e_study,negative 0.27418283 0 0 1 1 typical 0.5327821 ...
...,...,...
6329,7e6c68462e06_study,negative 0.23389703 0 0 1 1 typical 0.51131654...
6330,8332bdaddb6e_study,negative 0.30862394 0 0 1 1 typical 0.5125459 ...
6331,7eed9af03814_study,negative 0.6736997 0 0 1 1 typical 0.1416035 0...
6332,a0cb0b96fb3d_study,negative 0.09764711 0 0 1 1 typical 0.7224384 ...
