In [2]:
import pandas as pd

import argparse

import torch
from torch.utils.data import DataLoader
from torchvision import transforms

import timm

import wandb

from src import set_device, SubstrateDataset

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
class TestTransforms():

    def __init__(self, image_size):
        
        self.data_transform = {
            'test': transforms.Compose([
                transforms.Resize((image_size, image_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    [0.485, 0.456, 0.406],
                    [0.229, 0.224, 0.225]
                    ),
                ]),
        }
    
    def __call__(self, phase, img):
        return self.data_transform[phase](img)

In [5]:
def predict_classes(model, test_dataloader, device):
    preds = []
    for images, _ in test_dataloader:
        images = images.to(device)
        
        model.eval()
        
        outputs = model(images)
        pred = torch.argmax(outputs, dim=1)
        pred = pred.to('cpu').numpy()

        preds.extend(pred)

        if len(preds) % 100 == 0:
            print(f'{len(preds)} predictions done!')

    return preds

In [10]:
def predict(model, test_dataloader, device):
    outputs_list = []
    for images, _ in test_dataloader:
        images = images.to(device)
        
        model.eval()
        
        outputs = model(images)
        outputs = outputs.to('cpu').detach().numpy()

        outputs_list.extend(outputs)

        if len(outputs_list) % 100 == 0:
            print(f'{len(outputs_list)} predictions done!')

    return outputs_list

In [7]:
# gpu or cpu
device = set_device()

# read data
test = pd.read_csv('input/sample_submit.tsv', sep='\t', header=None)

# image name list & dummy label list
x_test = test[0].values
dummy = test[1].values

# dataset
test_dataset = SubstrateDataset(
    x_test,
    dummy,
    img_dir=f'input/v4/test',
    transform=TestTransforms(image_size=224),
    phase='test'
)

# dataloader
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

device: cpu


### Hard voting

In [13]:
preds_list = []
for i in range(5):
    # load model
    model = timm.create_model('convnext_base', pretrained=False, num_classes=2)

    # restore k-fold model in wandb
    best_model = wandb.restore(f'sleek-shape-45-{i}.ckpt', run_path='insilicomab/substrate/366jyvg6')

    # load state_dict from ckpt with 'model.' deleted
    state_dict = torch.load(best_model.name, map_location=torch.device(device))['state_dict']
    new_state_dict = { k.lstrip('model.') : v for k, v in state_dict.items() }
    model.load_state_dict(new_state_dict, strict=True)

    # inference
    preds = predict_classes(model, test_dataloader, device)
    preds_list.append(preds)

100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!


In [42]:
preds = pd.DataFrame(preds_list).T
preds

Unnamed: 0,0,1,2,3,4
0,1,0,1,1,1
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,1,1,1,1,1
...,...,...,...,...,...
208,1,1,1,1,1
209,0,0,0,0,0
210,0,0,0,0,0
211,1,1,1,1,1


### Soft voting

In [11]:
preds_list = []
for i in range(5):
    # load model
    model = timm.create_model('convnext_base', pretrained=False, num_classes=2)

    # restore k-fold model in wandb
    best_model = wandb.restore(f'sleek-shape-45-{i}.ckpt', run_path='insilicomab/substrate/366jyvg6')

    # load state_dict from ckpt with 'model.' deleted
    state_dict = torch.load(best_model.name, map_location=torch.device(device))['state_dict']
    new_state_dict = { k.lstrip('model.') : v for k, v in state_dict.items() }
    model.load_state_dict(new_state_dict, strict=True)

    # inference
    preds = predict(model, test_dataloader, device)
    preds_list.append(preds)

100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!
100 predictions done!
200 predictions done!


In [25]:
import numpy as np

preds_mean = np.mean(preds_list, axis=0)
pred = preds_mean.argmax(axis=1)

In [26]:
# submit
test[1] = pred
test.to_csv(
    'submit/submission_soft.tsv',
    sep='\t',
    header=None,
    index=None
)