In [10]:
import os
import skimage.io
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from glob import glob
import sys
sys.path.append('../input/sources')
from preprocess_model import load_models as prep_models
from preprocess_load_data import inference_PANDADataset as prep_panda
from torch.utils.data import DataLoader

In [2]:
crop_size=512
image_size = 512
data_dir = '../input/prostate-cancer-grade-assessment/'
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
df_test = pd.read_csv(os.path.join(data_dir, 'test.csv'))

image_folder = os.path.join(data_dir, 'test_images/')
is_test = os.path.exists(image_folder)  # IF test_images is not exists, we will use some train images.
image_folder = image_folder if is_test else os.path.join(data_dir, 'train_images/')

model_dir='../result/'

df = df_test if is_test else df_train.loc[:9]


In [3]:

def inference_gleason():

    batch_size = 12
    num_workers = 12
    out_dim = 6

    backborn='efficientnet-b0'

    device = torch.device('cuda')

    img_list=[]
    for img_id in tqdm(df['image_id'].values):
        img=skimage.io.MultiImage(os.path.join(image_folder,img_id)+'.tiff')[1]
        for i in range(img.shape[0]//crop_size+1):
            for j in range(img.shape[1]//crop_size+1):
                img_list.append(img_id+'_'+str(i)+'_'+str(j))

    df_cropped=pd.DataFrame({'image_id':img_list})

    model_files = [
        'efficientnet-b0_preprocess_512to512_best_fold0.pth'
    ]

    models = prep_models(model_files,model_dir,out_dim,device,backborn)

    dataset = prep_panda(df_cropped, image_size,crop_size,image_folder)
    loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

    LOGITS = []
    PREDS=[]
    with torch.no_grad():
        for data in tqdm(loader):
            data = data.to(device)
            logits = models[0](data)
            LOGITS.append(logits)
            pred=logits.argmax(1).detach()
            PREDS.append(pred)

    LOGITS = torch.cat(LOGITS).cpu().numpy()
    PREDS = torch.cat(PREDS).cpu().numpy()

    imgid=[imgid.split('_')[0] for imgid in df_cropped['image_id'].values]
    idx1=[imgid.split('_')[1] for imgid in df_cropped['image_id'].values]
    idx2=[imgid.split('_')[2] for imgid in df_cropped['image_id'].values]

    tmp=pd.DataFrame({'image_id':imgid,
                'idx1':np.array(idx1).astype(int),
                'idx2':np.array(idx2).astype(int),
                'gleason':PREDS})

    return tmp

In [4]:
df_gleason=inference_gleason()

100%|██████████| 10/10 [00:00<00:00, 12.24it/s]
  0%|          | 0/128 [00:00<?, ?it/s]

../result/efficientnet-b0_preprocess_512to512_best_fold0.pth loaded!


100%|██████████| 128/128 [00:13<00:00,  9.53it/s]


In [8]:
from main_model import load_models as inference_model
from main_load_data import PANDADataset

crop_size=512
n_split=4 # square number
n_tiles=36
tile_size=256
image_size = tile_size*int(np.sqrt(n_tiles))
batch_size = 4
num_workers = 4
out_dim = 5
gpu_id =0
device = torch.device(f'cuda:{gpu_id}')

enet_type = 'efficientnet-b0'
kernel_type = enet_type+'_train_'+str(crop_size)+'to'+str(image_size)

model_files = [
            'efficientnet-b0_train_512to1536_best_fold0.pth'
            ]
models = inference_model(model_files,model_dir,out_dim,device,enet_type)

dataset = PANDADataset(df , df_gleason, image_size, n_tiles,image_folder,
                                tile_size,crop_size,n_split,prediction=True,transform=None)
loader=DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

LOGITS = []
PREDS=[]
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)
        logits = models[0](data)
        LOGITS.append(logits)

LOGITS = torch.cat(LOGITS).sigmoid().cpu() 
PREDS = LOGITS.sum(1).round().numpy()


df['isup_grade'] = PREDS.astype(int)

  0%|          | 0/3 [00:00<?, ?it/s]

../result/efficientnet-b0_train_512to1536_best_fold0.pth loaded!


100%|██████████| 3/3 [00:01<00:00,  1.75it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [9]:
df[['image_id', 'isup_grade']]
# df[['image_id', 'isup_grade']].to_csv('submission.csv', index=False)

Unnamed: 0,image_id,isup_grade
0,0005f7aaab2800f6170c399693a96917,0
1,000920ad0b612851f8e01bcc880d9b3d,0
2,0018ae58b01bdadc8e347995b69f99aa,4
3,001c62abd11fa4b57bf7a6c603a11bb9,4
4,001d865e65ef5d2579c190a0e0350d8f,0
5,002a4db09dad406c85505a00fb6f6144,1
6,003046e27c8ead3e3db155780dc5498e,1
7,0032bfa835ce0f43a92ae0bbab6871cb,2
8,003a91841da04a5a31f808fb5c21538a,1
9,003d4dd6bd61221ebc0bfb9350db333f,0
