In [4]:
import numpy as np 
import pandas as pd
import os
import warnings
import random
import matplotlib.pyplot as plt
import cv2
import pytorch_lightning
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import shapely
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

import sys
sys.path.append('../../../')
from utils.LungSet import LungSet
from utils.utils import train_model, evaluate_model, generate_preds, build_model, get_available_devices, generate_preds_probas
from utils.utils import get_pred, retrieve_rois

warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seed=2024
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Detect available devices
devices = get_available_devices()
device = devices[0]
device

Number of GPUs available: 2
GPU 0: NVIDIA A100-SXM4-80GB
GPU 1: NVIDIA A100-SXM4-80GB


device(type='cuda', index=0)

In [5]:
BATCH_SIZE = 1024

In [6]:
valid_df = pd.read_csv('../../../data/processed/dataframe_validation_selected.csv').sort_values(by=['patient'])
valid_df['path'] = '../'+valid_df['path']
valid_df

Unnamed: 0,patient,filename,image,path
662428,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_26560_87040.jpg,../../data/processed/val/patches_as_jpg_full/0...
662093,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_64448_184768.jpg,../../data/processed/val/patches_as_jpg_full/0...
662092,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_71136_33728.jpg,../../data/processed/val/patches_as_jpg_full/0...
662091,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_54752_21696.jpg,../../data/processed/val/patches_as_jpg_full/0...
662090,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_24256_30304.jpg,../../data/processed/val/patches_as_jpg_full/0...
...,...,...,...,...
195602,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_71968_29408.jpg,../../data/processed/val/patches_as_jpg_full/z...
195603,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_22432_132768.jpg,../../data/processed/val/patches_as_jpg_full/z...
195604,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_20896_131744.jpg,../../data/processed/val/patches_as_jpg_full/z...
195606,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_58496_70592.jpg,../../data/processed/val/patches_as_jpg_full/z...


In [8]:
transformations = transforms.Compose([
    transforms.ToTensor()
])

X_valid, y_valid = valid_df, pd.DataFrame(np.zeros(len(valid_df)))
validset = LungSet(X_valid, y_valid, transforms=transformations)
validloader=DataLoader(validset, batch_size=BATCH_SIZE, shuffle=False)

In [6]:
# Detect available devices
devices = get_available_devices()
device = devices[0]
all_preds, all_probas = [], []
for fold in range(5):
    print(f'Fold {fold}')
    OZENCIGA_MODEL_PATH = '../../models/ozenciga_model/tenpercent_resnet18.ckpt'
    model = build_model(OZENCIGA_MODEL_PATH, device)
    model = nn.DataParallel(model, device_ids=[i for i in range(len(devices))])
    MODEL_PATH = f'../../../models/digilut_model_fine_tuned_with_ozenciga_normalized_fold{fold}.pt'
    model = nn.DataParallel(model, device_ids=[i for i in range(len(devices))])
    model_dict = torch.load(MODEL_PATH, map_location=device)
    model.load_state_dict(model_dict)
    model = model.to(device)
    model.eval()
    y_valid_preds, y_valid_probas = generate_preds_probas(model, validloader, device)
    all_preds.append(y_valid_preds)
    all_probas.append(y_valid_probas)

Number of GPUs available: 1
GPU 0: NVIDIA A100-SXM4-80GB
Fold 0


                                                   

Fold 0 Done!
Fold 1


                                                   

Fold 1 Done!
Fold 2


                                                   

Fold 2 Done!
Fold 3


                                                   

Fold 3 Done!
Fold 4


                                                   

Fold 4 Done!


In [None]:
from utils.utils import majority_voting, arith_mean, geo_mean, harm_mean

In [None]:
y_proba_0,y_proba_1,y_proba_2,y_proba_3,y_proba_4 = all_probas

yhat0,yhat1,yhat2,yhat3,yhat4 = all_preds

probs = torch.stack([y_proba_0, y_proba_1, y_proba_2, y_proba_3, y_proba_4], dim=0)
preds = np.stack([yhat0, yhat1, yhat2, yhat3, yhat4], axis=0)

majority_preds = majority_voting(preds)
final_arith_preds = arith_mean(probs)
final_geo_preds = geo_mean(probs)
final_harm_preds = harm_mean(probs)

valid_df['preds'] = final_arith_preds.numpy()
display(valid_df)

    
df_val  = pd.read_csv("../../../data/raw/validation.csv")
pred_csv = valid_df

filenames = df_val.filename.unique()
submission_df = pd.DataFrame()
display(submission_df)

for slide in tqdm(filenames):
    df_val_slide = df_val[df_val.filename==slide]
    trustii_ids = df_val_slide.trustii_id.values
    nb_roi = len(df_val_slide)
    coords, preds = get_pred(pred_csv, slide)
    rois = retrieve_rois(coords[preds==1], 512, nb_roi)
    if len(rois) < nb_roi:
        rois = list(rois)
        rois.extend([shapely.box(0,0,0,0)]*(nb_roi-len(rois)))
        rois = np.array(rois)
    rois_area = [roi.area for roi in rois]
    selected_rois = rois[np.argpartition(rois_area, -nb_roi)[-nb_roi:]]
    for i, selected_roi in enumerate(selected_rois):
        bbox = selected_roi.bounds
        df_dict = pd.DataFrame([{"trustii_id": trustii_ids[i], "filename": slide, "x1":int(bbox[0]), "x2": int(bbox[1]), "y1":int(bbox[2]), "y2":int(bbox[3])}])
        submission_df = pd.concat([submission_df, df_dict])
        
submission_df = submission_df.sort_values(by="trustii_id")
display(submission_df)
submission_df.to_csv(f'../../../data/predictions/submission_no_normalization_CV_arithmean.csv', index=False, encoding='UTF-8')