### Imports

In [1]:
local = True

In [2]:
import random
import sys
import os
import cv2
import skimage.io
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from pathlib import Path

import albumentations as A

In [3]:
if local:
    from chestxray.config import PANDA_PATH, PANDA_IMGS, CFG
    
    TEST_PATH = PANDA_IMGS
    ADD_DIR = Path("/home/ademyanchuk/Projects/ChestXRay/submit_panda")

In [4]:
if not local:
    class CFG:
        # overall
        debug = False
        seed = 1982
        # data
        img_height = 1024
        img_width = 1024
        target_size = 6
        img_id_col = "image_id"
        target_col = "isup_grade"
        tiff_layer = 1
        stoch_sample = True
        num_tiles = 16
        tile_sz = 256
        batch_size = 4
        accum_step = 16  # effective batch size will be batch_size * accum_step
        dataset = "hdf5"  # "patch", "tiles", "lazy", "hdf5"
        aug_type = "light"  # "light" or "heavy"
        # model
        finetune = False  # or "1stage"
        model_cls = "one_layer"  # "one_layer" or "deep"
        schedule_type = "none"  # "one_cycle", "reduce_on_plateau" or "cawr"
        cawr_T = 1
        cawr_Tmult = 2
        # loss
        loss = "cce"  # "cce" or "ls_soft_ce"
        # optim
        optim = "adam"  # "adam", "sgd" or "radam"
        lr = 0.001 if optim == "sgd" else 3e-4
        # schedule
        rlopp = 3  # learnig rate on plateu scheduler patience
        # training
        epoch = 50
        n_fold = 4
        use_amp = True

    PANDA_PATH = Path('../input/prostate-cancer-grade-assessment')
    TEST_PATH = Path('../input/prostate-cancer-grade-assessment/test_images')
    ADD_DIR = Path("../input/panda0")

In [5]:
sys.path.append(str(ADD_DIR))

In [6]:
# Imports from dataset
from submit_patch import PatchModel

### Setup

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [8]:
def seed_torch(seed=1982):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch()

In [9]:
os.listdir(PANDA_PATH)

['train_label_masks',
 'sample_submission.csv',
 'test.csv',
 'train_images',
 'folds.csv',
 'folds_db.csv',
 'hdf5',
 'train.csv']

In [10]:
TEST_PATH.exists()

True

### Data Loading

In [11]:
train = pd.read_csv(PANDA_PATH/'train.csv')
test = pd.read_csv(PANDA_PATH/'test.csv')
sample = pd.read_csv(PANDA_PATH/'sample_submission.csv')

In [12]:
train.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0


In [13]:
test.head()

Unnamed: 0,image_id,data_provider
0,005700be7e06878e6605e7a5a39de1b2,radboud
1,005c6e8877caf724c600fdce5d417d40,karolinska
2,0104f76634ff89bfff1ef0804a95c380,radboud


In [14]:
sample.head()

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0


### Dataset

In [15]:
def make_patch(image, patch_size, num_patch):
    h, w = image.shape[:2]
    s = patch_size

    pad_x = int(patch_size * np.ceil(w / patch_size) - w)
    pad_y = int(patch_size * np.ceil(h / patch_size) - h)
    image = cv2.copyMakeBorder(
        image, 0, pad_y, 0, pad_x, borderType=cv2.BORDER_CONSTANT, value=(255, 255, 255)
    )
    h, w = image.shape[:2]

    patch = image.reshape(h // s, s, w // s, s, 3)
    patch = patch.transpose(0, 2, 1, 3, 4).reshape(-1, s, s, 3)

    n = len(patch)
    index = np.argsort(patch.reshape(n, -1).sum(-1))[:num_patch]

    y = s * (index // (w // s))
    x = s * (index % (w // s))
    coord = np.stack([x, y, x + s, y + s]).T

    patch = patch[index]
    if len(patch) < num_patch:
        n = num_patch - len(patch)
        patch = np.concatenate(
            [patch, np.full((n, patch_size, patch_size, 3), 255, dtype=np.uint8)], 0
        )
        coord = np.concatenate([coord, np.full((n, 4), -1)], 0)
    return patch, coord

In [16]:
normalize = A.Compose(
    [A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],)]
)

In [17]:
class PatchTestDataset(Dataset):
    def __init__(self, df, transform=None, suffix="tiff"):
        self.df = df
        self.labels = df[CFG.target_col].values
        self.transform = transform
        self.suffix = suffix

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_id = self.df[CFG.img_id_col].values[idx]
        if self.suffix == "tiff":
            file_path = f"{TEST_PATH}/{file_id}.{self.suffix}"
            image = skimage.io.MultiImage(file_path)[CFG.tiff_layer]
        elif self.suffix == "jpeg":
            file_path = f"{TEST_PATH}/{file_id}_1.{self.suffix}"
            image = cv2.imread(str(file_path))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        patch, coord = make_patch(
            image, patch_size=CFG.tile_sz, num_patch=CFG.num_tiles
        )
        # augment sequence
        if self.transform:
            for i in range(len(patch)):
                augmented = self.transform(image=patch[i])
                patch[i] = augmented["image"]

        normalized = normalize(image=patch)
        patch = normalized["image"]

        patch = patch.transpose(0, 3, 1, 2)
        patch = np.ascontiguousarray(patch)

        return patch

### Model

In [18]:
ls {ADD_DIR}/24-05-2020-15-51_loss.pth

/home/ademyanchuk/Projects/ChestXRay/submit_panda/24-05-2020-15-51_loss.pth


In [19]:
model = PatchModel(pretrained=False)

weights_path = ADD_DIR/"24-05-2020-15-51_loss.pth"
checkpoint = torch.load(weights_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

### Inference

In [20]:
def inference(model, test_loader, device):
    
    model.to(device)
    model.eval()
    
    preds = []

    for i, images in enumerate(test_loader):
            
        images = images.to(device)
            
        with torch.no_grad():
            y_preds = model(images)
            
        preds.append(y_preds.to('cpu').numpy().argmax(1))

    preds = np.concatenate(preds)
    
    return preds

In [21]:
def submit(sample):
    if TEST_PATH.exists():
        test_dataset = PatchTestDataset(sample)
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=2)
        preds = inference(model, test_loader, device)
        sample['isup_grade'] = preds
    return sample

In [22]:
if local:
    sample = train.sample(50)
submission = submit(sample)
submission['isup_grade'] = submission['isup_grade'].astype(int)
submission.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
5515,87c639ec5365c2d6d61581be6407c8b6,radboud,5,4+5
7977,c10eb86a927f359f5d1e573927f19945,radboud,1,3+3
9742,ec11cdbfe083f766ebc6a851353a1662,karolinska,1,3+4
611,0f83710916fcc2edf1a7a9a103fd48ac,radboud,2,3+4
1837,2e53a31a0727921e823b781914782036,karolinska,1,3+3


In [None]:
submission.to_csv('submission.csv', index=False)