In [2]:
import os
import sys
sys.path.insert(0, os.path.abspath("../"))

import yaml
import torch
import pytorch_lightning as pl
import pandas as pd

from dataset import PredictionDataset, HEPredDataModule
from model import ImageClassifier3

def load_config(config_path):
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

CONFIG_PATH = "/media/cansu/DiskSpace/Cansu/HE_prediction_improved/configs/config_eff_t5_repeat_1_othermain.yml"
config = load_config(CONFIG_PATH)

pl.seed_everything(42, workers=True)

FOLD = 0
repo_path = os.getcwd()

test_csv = f"{repo_path}/data/test_fold_{FOLD}.csv"
test_df = pd.read_csv(test_csv)

md_path = repo_path + "/data/metadata.csv"
dataset = PredictionDataset(md_path=md_path, mask=True)

df = pd.DataFrame({
    "patient_id": dataset.patient_id,
    "index": dataset.index
})

df["patient_id"] = df["patient_id"].astype(str)
test_df["patient_id"] = test_df["patient_id"].astype(str)

test_indexes = df[df["patient_id"].isin(test_df["patient_id"])]["index"].values

X_test = dataset.index[test_indexes]
y_test = dataset.labels[test_indexes]

dm = HEPredDataModule(
    split_indexes=(None, None, X_test, None, None, y_test),
    filter_slices=True,
    mask=True,
    batch_size=config["BATCH_SIZE"],
    num_workers=8,
    use_2d=True,
    return_type="image",
    md_path=md_path,
    threshold=config["THRESHOLD"],
    roi=config["ROI"],
    problem=config["TASK"],
    image_size=config["INPUT_SIZE"],
    roi_size=config["INPUT_SIZE"],
    lesion=config["LESION"],
    test_type=config["TEST_TYPE"],
    apply_hflip = config["APPLY_HFLIP"],
    apply_affine = config["APPLY_AFFINE"],
    apply_gaussian_blur=config["APPLY_GAUSSIAN_BLUR"],
    affine_degree=config["AFFINE_DEGREE"],
    affine_translate=config["AFFINE_TRANSLATE"],
    affine_scale=config["AFFINE_SCALE"],
    affine_shear=config["AFFINE_SHEAR"], 
    hflip_p = config["HFLIP_P"], affine_p = config["AFFINE_P"]
)

dm.setup(stage="test")

checkpoint_path = '/media/cansu/DiskSpace/Cansu/HE_prediction_improved/checkpoints/org_da_sy_es5_hf05_repeat_2711_1_othermain/0/last_model-v1.ckpt'

model = ImageClassifier3.load_from_checkpoint(
    checkpoint_path,
    model=config["MODEL"],
    learning_rate=config["LR"],          # ignored but required
    optimizer=config["OPTIMIZER"],        # ignored
    weight_decay=config["WEIGHT_DECAY"],
    momentum=config["MOMENTUM"],
    lr_scheduler=config["LR_SCHEDULER"],
    step_size=config["STEP_SIZE"],
    gamma=config["GAMMA"],
    num_classes=1,
    pw_based="mean",
    test_pw_based="mean",
    loss=config["LOSS"],
    fold=FOLD,
    task=config["T"],
    back=config["BACKBONE"],
    name_experiment="inference_only",
    threshold=config["threshold_percentage"],
    save_predictions=True,
)

trainer = pl.Trainer(
    accelerator="gpu",
    devices=1,
    logger=False,
    enable_checkpointing=False,
)

results = trainer.test(model=model, datamodule=dm)
print(results)

Global seed set to 42
Global seed set to 42
  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 42
Global seed set to 42
GPU available: True (cuda), used: True
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
HPU available: False, using: 0 HPUs


Using eff s
Using focal loss


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


number of slices after filtering in test  300
number of slices in the test set:  300
Testing DataLoader 0: 100%|██████████| 75/75 [00:09<00:00,  8.17it/s]self.patient_probs_with_slices after sorting:  {186: {50: 0.2798842489719391, 51: 0.18348172307014465, 52: 0.4362398684024811, 53: 0.5307512283325195, 54: 0.6029362678527832, 55: 0.38344618678092957, 56: 0.7900263667106628, 57: 0.6295958757400513, 58: 0.7625591158866882, 59: 0.8014891147613525, 60: 0.7094277739524841, 61: 0.7710890769958496, 62: 0.6327681541442871, 63: 0.7753778100013733, 64: 0.6813045144081116, 65: 0.7646058797836304, 66: 0.8323580622673035, 67: 0.8139166235923767, 68: 0.8662103414535522, 69: 0.687671422958374, 70: 0.8109976053237915}, 140: {39: 0.04508892446756363, 40: 0.27969086170196533, 41: 0.05185950547456741, 42: 0.027586758136749268, 43: 0.05641629919409752, 44: 0.008557043969631195, 45: 0.0013824669877067208, 46: 0.00453156465664506, 47: 0.0223369300365448, 48: 0.0036700654309242964, 49: 0.00360580044798553, 

  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.pat

[{'test_loss': 0.39712411165237427, 'test_acc': 0.6866666674613953, 'test_roc_auc_score_pw': 0.7142857142857144, 'test_roc_auc_score_iw': 0.6848072562358277, 'test_acc_pw_epoch': 0.7142857313156128, 'test_prec_pw_epoch': 0.800000011920929, 'test_f1_pw_epoch': 0.6666666865348816, 'test_sens_pw_epoch': 0.5714285969734192, 'test_sens_iw_epoch': 0.5918367505073547, 'test_spec_iw_epoch': 0.7777777910232544, 'test_spec_pw_epoch': 0.8571428656578064, 'test_loss_epoch': 0.39712414145469666, 'test_acc_epoch': 0.6866666674613953, 'test_prec_epoch': 0.7190082669258118, 'test_f1_epoch': 0.6492537260055542}]


# Add other datasets to our metadata_dall_data.csv 

In [None]:
md = repo_path + "/data/metadata.csv"
metadata_df = pd.read_csv(md)
metadata_df.columns 

Index(['patient_id', 'image_size', 'ct_ss_path', 'ct_nc_path', 'mask_path',
       'label', 'volume', 'IVH', 'index'],
      dtype='object')

In [None]:
## add hospital name column to the metadata_df
# but just name all the names as Hospital Josep Trueta Hospital Josep Trueta
metadata_df["Hospital name"] = "Hospital Josep Trueta"

In [8]:
excel_file = "/media/cansu/DiskSpace/Cansu/HE_prediction_improved/data/RAINS_vicorob/all_data_inc_trueta.xlsx"
df = pd.read_excel(excel_file)
df

Unnamed: 0,Hospital name,id,"Basal volume, ml","FU volume, ml","Absolute vol diff, ml",Relative vol diff (FU_vol/Basal_vol),HE vicorob,HE real
0,Hospital Clínic,2098,5.02,4.87,-0.150,0.970,0.0,
1,Hospital Clínic,233632,36.07,39.24,3.171,1.088,0.0,
2,Hospital Clínic,261065,9.50,11.41,1.915,1.202,0.0,
3,Hospital Clínic,34333,,,,,,
4,Hospital Clínic,397280,9.37,8.33,-1.038,0.889,0.0,
...,...,...,...,...,...,...,...,...
477,Hospital Josep Trueta,pt217,0.45,22.26,21.800,49.060,1.0,1.0
478,Hospital Josep Trueta,pt218,7.19,7.24,0.050,1.010,0.0,0.0
479,Hospital Josep Trueta,pt219,1.09,1.39,0.300,1.280,0.0,0.0
480,Hospital Josep Trueta,pt220,48.01,46.48,-1.530,0.970,0.0,0.0


In [34]:
# change the column name id to patient_id
# df.rename(columns={"id": "patient_id"}, inplace=True)
# create image_size column but add only nan
# df["image_size"] = [float('nan')] * len(df)
# remove column HE real 
# df = df.drop(columns=["HE real"])
# add column IVH but all with nan
# df["IVH"] = [float('nan')] * len(df)
# change Basal volume, ml column name to  Volume
# df.rename(columns={"Basal volume, ml": "Volume"}, inplace=True) 
# remove columns FU volume, ml	Absolute vol diff, ml	Relative vol diff (FU_vol/Basal_vol)
# df = df.drop(columns=["FU volume, ml", "Absolute vol diff, ml", "Relative vol diff (FU_vol/Basal_vol)"])
# we will remove the cases of we do not have the he vicorob labels 
# remove rows where HE vicorob is NaN
# df = df.dropna(subset=["HE vicorob"])
# change the column name HE vicorob to label 
# df.rename(columns={"HE vicorob": "label"}, inplace=True)

# remove index column if exists
if "index" in df.columns:
    df = df.drop(columns=["index"])

# now we will add a column named index but it will be basically from 0 to len(df)-1
df["index"] = range(len(df))

# change column name from Volume to volume 
df.rename(columns={"Volume": "volume"}, inplace=True)

# add columna named ct_ss_path for the image path 
# each image has the path as /media/cansu/DiskSpace/Cansu/HE_prediction_improved/data/RAINS_vicorob/Hospital Clínic/2098/Basal/CT_SS.nii.gz meaning repo_path + "/data/RAINS_vicorob/" + Hospital name + "/" + patient_id + "/Basal/CT_SS.nii.gz
for index, row in df.iterrows():
    patient_id = row["patient_id"]
    hospital_name = row["Hospital name"]
    image_path = f"{repo_path}/data/RAINS_vicorob/{hospital_name}/{patient_id}/Basal/CT_SS.nii.gz"
    ct_nc_path = f"{repo_path}/data/RAINS_vicorob/{hospital_name}/{patient_id}/Basal/CT_NC.nii.gz"
    mask_path = f"{repo_path}/data/RAINS_vicorob/{hospital_name}/{patient_id}/Basal/hematoma_mask_vicorob_reviewed_reoriented.nii.gz"
    df.at[index, "ct_ss_path"] = image_path
    df.at[index, "ct_nc_path"] = ct_nc_path
    df.at[index, "mask_path"] = mask_path

# convert label column to int
df["label"] = df["label"].astype(int)
df

Unnamed: 0,level_0,Hospital name,patient_id,volume,label,image_size,IVH,ct_ss_path,ct_nc_path,mask_path,index
0,0,Hospital Clínic,2098,5.02,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,0
1,1,Hospital Clínic,233632,36.07,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,1
2,2,Hospital Clínic,261065,9.50,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,2
4,4,Hospital Clínic,397280,9.37,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,3
6,6,Hospital Clínic,4141022,32.23,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,4
...,...,...,...,...,...,...,...,...,...,...,...
477,477,Hospital Josep Trueta,pt217,0.45,1,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,452
478,478,Hospital Josep Trueta,pt218,7.19,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,453
479,479,Hospital Josep Trueta,pt219,1.09,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,454
480,480,Hospital Josep Trueta,pt220,48.01,0,,,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,/media/cansu/DiskSpace/Cansu/HE_prediction_imp...,455


In [35]:
# save dataframe to csv
df.to_csv("/media/cansu/DiskSpace/Cansu/HE_prediction_improved/data/metadata_new_dataset.csv", index=False)

# Inferencing with the new dataset 

In [5]:
import os
import sys
sys.path.insert(0, os.path.abspath("../"))

import yaml
import torch
import pytorch_lightning as pl
import pandas as pd

from dataset import PredictionDataset, HEPredDataModule
from model import ImageClassifier3

def load_config(config_path):
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

CONFIG_PATH = "/media/cansu/DiskSpace/Cansu/HE_prediction_improved/configs/config_eff_t5_repeat_1_othermain.yml"
config = load_config(CONFIG_PATH)

pl.seed_everything(42, workers=True)

FOLD = 0
repo_path = os.getcwd()

test_csv = '/media/cansu/DiskSpace/Cansu/HE_prediction_improved/data/hospital_clinic_trial.csv'
test_df = pd.read_csv(test_csv)

md_path = "/media/cansu/DiskSpace/Cansu/HE_prediction_improved/data/metadata_new_dataset.csv"
dataset = PredictionDataset(md_path=md_path, mask=True)

df = pd.DataFrame({
    "patient_id": dataset.patient_id,
    "index": dataset.index
})

df["patient_id"] = df["patient_id"].astype(str)
test_df["patient_id"] = test_df["patient_id"].astype(str)

test_indexes = df[df["patient_id"].isin(test_df["patient_id"])]["index"].values

X_test = dataset.index[test_indexes]
y_test = dataset.labels[test_indexes]

dm = HEPredDataModule(
    split_indexes=(None, None, X_test, None, None, y_test),
    filter_slices=True,
    mask=True,
    batch_size=config["BATCH_SIZE"],
    num_workers=8,
    use_2d=True,
    return_type="image",
    md_path=md_path,
    threshold=config["THRESHOLD"],
    roi=config["ROI"],
    problem=config["TASK"],
    image_size=config["INPUT_SIZE"],
    roi_size=config["INPUT_SIZE"],
    lesion=config["LESION"],
    test_type=config["TEST_TYPE"],
    apply_hflip = config["APPLY_HFLIP"],
    apply_affine = config["APPLY_AFFINE"],
    apply_gaussian_blur=config["APPLY_GAUSSIAN_BLUR"],
    affine_degree=config["AFFINE_DEGREE"],
    affine_translate=config["AFFINE_TRANSLATE"],
    affine_scale=config["AFFINE_SCALE"],
    affine_shear=config["AFFINE_SHEAR"], 
    hflip_p = config["HFLIP_P"], affine_p = config["AFFINE_P"]
)

dm.setup(stage="test")

checkpoint_path = '/media/cansu/DiskSpace/Cansu/HE_prediction_improved/checkpoints/org_da_sy_es5_hf05_repeat_2711_1_othermain/0/last_model-v1.ckpt'

model = ImageClassifier3.load_from_checkpoint(
    checkpoint_path,
    model=config["MODEL"],
    learning_rate=config["LR"],          # ignored but required
    optimizer=config["OPTIMIZER"],        # ignored
    weight_decay=config["WEIGHT_DECAY"],
    momentum=config["MOMENTUM"],
    lr_scheduler=config["LR_SCHEDULER"],
    step_size=config["STEP_SIZE"],
    gamma=config["GAMMA"],
    num_classes=1,
    pw_based="mean",
    test_pw_based="mean",
    loss=config["LOSS"],
    fold=FOLD,
    task=config["T"],
    back=config["BACKBONE"],
    name_experiment="inference_only",
    threshold=config["threshold_percentage"],
    save_predictions=True,
)

trainer = pl.Trainer(
    accelerator="gpu",
    devices=1,
    logger=False,
    enable_checkpointing=False,
)

results = trainer.test(model=model, datamodule=dm)
print(results)

Global seed set to 42


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Using eff s
Using focal loss
number of slices after filtering in test  160
number of slices in the test set:  160
Testing DataLoader 0: 100%|██████████| 40/40 [00:02<00:00, 17.31it/s]self.patient_probs_with_slices after sorting:  {2098: {13: 0.05093580484390259, 14: 0.024922451004385948, 15: 0.024693427607417107, 16: 0.003608453320339322, 17: 0.03008950687944889}, 233632: {10: 0.04867088794708252, 11: 0.1756911724805832, 12: 0.2408410608768463, 13: 0.7124733328819275, 14: 0.46294113993644714, 15: 0.5204938054084778, 16: 0.06711367517709732, 17: 0.05677546188235283, 18: 0.09729817509651184, 19: 0.058101531118154526}, 261065: {13: 0.06752586364746094, 14: 0.3633178472518921, 15: 0.18352994322776794, 16: 0.11218925565481186, 17: 0.03690936416387558, 18: 0.023360520601272583}, 397280: {14: 0.02731223963201046, 15: 0.0009957184083759785, 16: 0.0004727476043626666, 17: 0.002441821852698922, 18: 0.0007654950022697449, 19: 0.0005463900160975754, 20: 0.0026026442646980286}, 4141022: {10: 0.0402

  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.patient_probs_with_slices_df.append({"patient_id": patient_id,
  self.patient_probs_with_slices_df = self.pat

[{'test_loss': 0.23857226967811584, 'test_acc': 0.8500000238418579, 'test_roc_auc_score_pw': 0.75, 'test_roc_auc_score_iw': 0.7115450496745459, 'test_acc_pw_epoch': 0.9130434989929199, 'test_prec_pw_epoch': 1.0, 'test_f1_pw_epoch': 0.6666666865348816, 'test_sens_pw_epoch': 0.5, 'test_sens_iw_epoch': 0.523809552192688, 'test_spec_iw_epoch': 0.8992805480957031, 'test_spec_pw_epoch': 1.0, 'test_loss_epoch': 0.23857228457927704, 'test_acc_epoch': 0.8500000238418579, 'test_prec_epoch': 0.4399999976158142, 'test_f1_epoch': 0.47826087474823}]
