In [1]:
import numpy as np 
import pandas as pd
import os
import warnings
import random
import matplotlib.pyplot as plt
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import shapely

import sys
sys.path.append('../../')
from utils.LungSet import LungSet
from utils.utils import train_model, evaluate_model, generate_preds, build_model, get_available_devices
from utils.utils import get_pred, retrieve_rois

warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seed=2024
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Detect available devices
devices = get_available_devices()
device = devices[0]
device

Number of GPUs available: 2
GPU 0: NVIDIA A100-SXM4-80GB
GPU 1: NVIDIA A100-SXM4-80GB


device(type='cuda', index=0)

In [2]:
BATCH_SIZE = 2048

In [9]:
valid_df = pd.read_csv('../../data/processed/dataframe_validation_selected.csv').sort_values(by=['patient'])
valid_df['path'] = '../'+valid_df['path']
valid_df

Unnamed: 0,patient,filename,image,path
662428,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_26560_87040.jpg,../../data/processed/val/patches_as_jpg_full/0...
662093,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_64448_184768.jpg,../../data/processed/val/patches_as_jpg_full/0...
662092,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_71136_33728.jpg,../../data/processed/val/patches_as_jpg_full/0...
662091,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_54752_21696.jpg,../../data/processed/val/patches_as_jpg_full/0...
662090,0Rv3MjnLWH,0Rv3MjnLWH_a.tif,0Rv3MjnLWH_a_24256_30304.jpg,../../data/processed/val/patches_as_jpg_full/0...
...,...,...,...,...
195602,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_71968_29408.jpg,../../data/processed/val/patches_as_jpg_full/z...
195603,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_22432_132768.jpg,../../data/processed/val/patches_as_jpg_full/z...
195604,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_20896_131744.jpg,../../data/processed/val/patches_as_jpg_full/z...
195606,zmvPBONP7H,zmvPBONP7H_a.tif,zmvPBONP7H_a_58496_70592.jpg,../../data/processed/val/patches_as_jpg_full/z...


In [10]:
transformations = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3)])
    
X_valid, y_valid = valid_df, pd.DataFrame(np.zeros(len(valid_df)))
validset = LungSet(X_valid, y_valid, transforms=transformations)
validloader=DataLoader(validset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
OZENCIGA_MODEL_PATH = '../../models/ozenciga_model/tenpercent_resnet18.ckpt'
model = build_model(OZENCIGA_MODEL_PATH, device)

model = nn.DataParallel(model, device_ids=[i for i in range(len(devices))])
MODEL_PATH = '../../models/digilut_model_fine_tuned_with_ozenciga.pt'
model_dict = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(model_dict)
model = model.to(device)
model.eval()

y_valid, y_valid_preds = generate_preds(model, validloader, device)
valid_df['preds'] = y_valid_preds.numpy()
dispaly(valid_df)

In [12]:
df_val  = pd.read_csv("../../data/raw/validation.csv")
pred_csv = valid_df

pred_csv = pd.read_csv("../../first_modeling/validation_dataframe_with_preds.csv")

filenames = df_val.filename.unique()
submission_df = pd.DataFrame()
submission_df

In [13]:
for slide in tqdm(filenames):
    df_val_slide = df_val[df_val.filename==slide]
    trustii_ids = df_val_slide.trustii_id.values
    nb_roi = len(df_val_slide)
    coords, preds = get_pred(pred_csv, slide)
    rois = retrieve_rois(coords[preds==1], 512, nb_roi)
    if len(rois) < nb_roi:
        rois = list(rois)
        rois.extend([shapely.box(0,0,0,0)]*(nb_roi-len(rois)))
        rois = np.array(rois)
    rois_area = [roi.area for roi in rois]
    selected_rois = rois[np.argpartition(rois_area, -nb_roi)[-nb_roi:]]
    for i, selected_roi in enumerate(selected_rois):
        bbox = selected_roi.bounds
        df_dict = pd.DataFrame([{"trustii_id": trustii_ids[i], "filename": slide, "x1":int(bbox[0]), "x2": int(bbox[1]), "y1":int(bbox[2]), "y2":int(bbox[3])}])
        submission_df = pd.concat([submission_df, df_dict])
        
submission_df = submission_df.sort_values(by="trustii_id")
submission_df

100%|██████████| 165/165 [00:30<00:00,  5.37it/s]


Unnamed: 0,trustii_id,filename,x1,x2,y1,y2
0,1,hqi5y2OzZy_b.tif,18144,143136,18656,144160
0,2,1xebGQuAM7_b.tif,47328,67264,49888,68800
0,3,8xGdkL0vZt_a.tif,69856,5632,73440,9216
0,4,LQj5lC48hB_a.tif,6112,62944,7648,63968
0,5,9NlPhYAFUH_a.tif,25152,171456,25664,172992
...,...,...,...,...,...,...
0,758,7kiGhyiFBZ_b.tif,71072,39360,72096,41408
0,759,M62FqXX2cW_a.tif,4096,154304,4608,154816
0,760,AIg925SQy8_a.tif,68288,81600,68800,82624
0,761,OXCUbLOBGD_a.tif,10464,121536,11488,123584


In [15]:
submission_df.to_csv('../../data/predictions/submission_no_normalization.csv', index=False, encoding='UTF-8')