### Imports

In [1]:
%load_ext autoreload
%autoreload 2

#### Independent Imports

In [2]:
import random
from collections import OrderedDict
import sys
import os
import cv2
import skimage.io
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from pathlib import Path

#### Location Dependent Imports

In [3]:
local = Path.home().group() == 'ademyanchuk'

In [4]:
if local:
    from chestxray.config import PANDA_PATH, PANDA_IMGS
    
    TEST_PATH = PANDA_IMGS
    ADD_DIR = Path("/home/ademyanchuk/Projects/ChestXRay/submit_panda")

In [5]:
if not local:
    ADD_DIR = Path("../input/panda0")
    EFFNET_DIR = "../input/TODO"
    sys.path.append(EFFNET_DIR)

In [6]:
sys.path.append(str(ADD_DIR))

In [26]:
# Imports from dataset
from submit_patch import PatchModel, aggregate
from submit_patch import CFG
from submit_patch import PatchTestDataset

In [8]:
from efficientnet_pytorch import EfficientNet

In [9]:
if not local:
    from submit_patch import TEST_PATH, PANDA_PATH

### Setup

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [11]:
def seed_torch(seed=1982):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch()

In [12]:
os.listdir(PANDA_PATH)

['train_label_masks',
 'sample_submission.csv',
 'folds_dedup.csv',
 'test.csv',
 'train_images',
 'folds.csv',
 'folds_db.csv',
 'folds_cleaned.csv',
 'jpeg',
 'PANDA_Suspicious_Slides.csv',
 'hashes.npy',
 'hdf5',
 'train.csv']

In [13]:
TEST_PATH.exists()

True

### Data Loading

In [14]:
train = pd.read_csv(PANDA_PATH/'train.csv')
test = pd.read_csv(PANDA_PATH/'test.csv')
sample = pd.read_csv(PANDA_PATH/'sample_submission.csv')

In [15]:
train.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0


In [16]:
test.head()

Unnamed: 0,image_id,data_provider
0,005700be7e06878e6605e7a5a39de1b2,radboud
1,005c6e8877caf724c600fdce5d417d40,karolinska
2,0104f76634ff89bfff1ef0804a95c380,radboud


In [17]:
sample.head()

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0


### Classes Definitions

In [18]:
class PatchEnetModel(nn.Module):
    def __init__(self, backbone="efficientnet-b0", n=CFG.target_size, pretrained=True):
        super().__init__()
        assert backbone in ["efficientnet-b0"]

        # if we use BCE loss, need n-1 outputs
        if CFG.loss == "bce":
            n -= 1

        if pretrained:
            self.model = EfficientNet.from_pretrained(backbone)
        else:
            self.model = EfficientNet.from_name(backbone)

        num_ftrs = self.model._fc.in_features
        if CFG.model_cls == "deep":
            self.model._fc = nn.Sequential(
                OrderedDict(
                    [
                        (
                            "cls_fc",
                            nn.Linear(2 * num_ftrs, 512),
                        ),  # agregate use concat pooling, so *2
                        ("cls_bn", nn.BatchNorm1d(512)),
                        ("cls_relu", nn.ReLU(inplace=True)),
                        ("cls_logit", nn.Linear(512, n)),
                    ]
                )
            )
        elif CFG.model_cls == "one_layer":
            self.model._fc = nn.Sequential(
                OrderedDict([("cls_logit", nn.Linear(2 * num_ftrs, n))])
            )
        del self.model._avg_pooling  # use pooling in aggregate func

    def forward(self, x):
        batch_size, num_patch, C, H, W = x.shape

        x = x.view(-1, C, H, W)  # x -> bs*num_patch x C x H x W
        x = self.model.extract_features(
            x
        )  # x -> bs*num_patch x C(Maps) x H(Maps) x W(Maps)

        x = aggregate(x, batch_size, num_patch)
        x = self.model._dropout(x)
        x = self.model._fc(x)
        return x

### Model

In [19]:
ls {ADD_DIR}/debug_2_loss.pth

/home/ademyanchuk/Projects/ChestXRay/submit_panda/debug_2_loss.pth


In [20]:
def init_model(arch=CFG.arch, pretrained=False):
    # separate Patch Model class for EfficientNet
    # no bias init as in the 0.87 kernel so use model as in kernel
    # and everything else as in my pipeline
    if arch == "efnet":
        model_ft = PatchEnetModel(pretrained=pretrained)
    else:
        model_ft = PatchModel(arch=CFG.arch, pretrained=pretrained)
    return model_ft

In [21]:
model_fnames = [
    "debug_2_loss.pth",
]
# Load Model/s
models = []
for name in model_fnames:
    model = init_model()

    weights_path = ADD_DIR/name
    checkpoint = torch.load(weights_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    models.append(model)
    
len(models)

1

### Inference

In [22]:
def compute_preds(outputs, loss_type=CFG.loss):
        if loss_type == "bce":
            preds = outputs.sigmoid().sum(1).data.cpu().round().numpy()
        else:
            preds = outputs.data.to("cpu").numpy().argmax(1)
        return preds.astype(int)

In [23]:
TARGET_SZ = CFG.target_size

if CFG.loss == "bce":
    TARGET_SZ = CFG.target_size - 1

def inference(models, test_loader, device):
    
    for model in models:
        model.to(device)
        model.eval()
    
    preds = []

    for i, images in enumerate(test_loader):    
        images = images.to(device)
        y_preds = torch.zeros((len(images), TARGET_SZ), device=device)
            
        with torch.no_grad():
            for model in models:
                y_preds += model(images)
            y_preds = y_preds / len(models)
            
        pred = compute_preds(y_preds)
        preds.append(pred)

    preds = np.concatenate(preds)
    
    return preds

In [24]:
def submit(sample):
    if TEST_PATH.exists():
        test_dataset = PatchTestDataset(sample, img_path=TEST_PATH)
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=2)
        preds = inference(models, test_loader, device)
        sample['isup_grade'] = preds
    return sample

In [27]:
if local:
    sample = train.sample(50)
submission = submit(sample)
submission['isup_grade'] = submission['isup_grade'].astype(int)
submission.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
10384,fae4428cb40950b27cf533531356ffab,karolinska,1,3+3
3649,5ad2c7349c24ed5c4901522bec9d3077,karolinska,1,0+0
3946,6232504a502236dc3e38f9fe46619308,radboud,2,4+3
5815,8ea5bb9e2220e1906653030284775e1a,radboud,2,4+3
2288,38e41caa38bdc405984285a96cd11436,karolinska,2,4+4


In [None]:
permute = False
if TEST_PATH.exists() and permute:
    hard_cls_values = submission.loc[submission.isup_grade.isin([2,3,4]), "isup_grade"].copy().values
    np.random.shuffle(hard_cls_values)
    submission.loc[submission.isup_grade.isin([2,3,4]), "isup_grade"] = hard_cls_values
else:
    print("No hard class permutation")

In [None]:
submission.to_csv('submission.csv', index=False)