In [1]:
# Basic imports
import os
import sys
import json

import pandas as pd 
import numpy as np 
from datetime import datetime
sys.path.append(os.path.abspath('../..'))
from utils.utils_constants import COMBINED_CLASSIFICATION_PATH, CROPPED_VESSELS_COMBINED_DIR

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
CONFIG_PATH = f"./config_five_classes.json"
CONFIG_PATH

'./config_five_classes.json'

In [10]:
combined_classifications

Unnamed: 0,Image Name,Artery ID,Bounding Box,Artery Type,Arteriosclerosis Severity,Hyalinosis Severity,Biopsy ID
0,Biopsy_004_WSI_001_A01_21747_18330_283_234.png,A01,"21747, 18330, 283, 234",Arterioles,0 - No,0 - No,Biopsy_004
1,Biopsy_004_WSI_001_A02_21639_19547_447_354.png,A02,"21639, 19547, 447, 354",Arterioles,0 - No,0 - No,Biopsy_004
2,Biopsy_004_WSI_001_A03_21966_19848_231_304.png,A03,"21966, 19848, 231, 304",Arterioles,0 - No,0 - No,Biopsy_004
3,Biopsy_004_WSI_001_A04_22082_20088_245_247.png,A04,"22082, 20088, 245, 247",Arterioles,0 - No,0 - No,Biopsy_004
4,Biopsy_004_WSI_001_A05_20649_21144_473_301.png,A05,"20649, 21144, 473, 301",Arterioles,0 - No,1 - Mild,Biopsy_004
...,...,...,...,...,...,...,...
2934,Biopsy_101_WSI_001_A25_133192_31934_487_554.png,A25,"133192, 31934, 487, 554",Arterioles,0 - No,0 - No,Biopsy_101
2937,Biopsy_101_WSI_001_A22_132602_34684_344_183.png,A22,"132602, 34684, 344, 183",Arterioles,0 - No,0 - No,Biopsy_101
2938,Biopsy_101_WSI_001_A21_138923_20246_231_347.png,A21,"138923, 20246, 231, 347",Arterioles,0 - No,1 - Mild,Biopsy_101
2940,Biopsy_101_WSI_001_A19_143025_44246_223_249.png,A19,"143025, 44246, 223, 249",Arterioles,0 - No,0 - No,Biopsy_101


In [2]:
configs = {
    "net": {
        "in_channels": 3,
        "n_classes": 5,
        "padding": True,  # Ensure boolean values are capitalized in Python
        "depth": 5,
        "wf": 4,
        "up_mode": "upconv",
        "batch_norm": True,  # Ensure boolean values are capitalized in Python
    },

    "training": {
        "gpu_id": 0,
        "batch_size": 10,
        "num_epochs": 50,
        "lr": 0.001,
        "edge_weight": 1.2,
        "class_weights": [0.47, 0.53],  # Python lists should be formatted correctly
        "ignore_index": -100
    },

    "data": {
        "base_path": "/DataMount/NEPTUNE/Cropped_Vessels_Combined/",
        "mask_suffix": "_mask.png",
        "resize_to": 256,  # Corrected to include a trailing comma if expecting further entries
        "train_files": [],
        "val_files": []
    },
    "inference": {
        "gpu_id": 1,
        "model_path": "models/None_model_20240701-005141_epoch_50.pth",
        "batch_size": 1,
        "inference_files": [],
    }
}

In [3]:
combined_classifications = pd.read_csv(COMBINED_CLASSIFICATION_PATH)
# combined_classifications = combined_classifications[combined_classifications["Artery Type"] != "Others"]
combined_classifications = combined_classifications[combined_classifications["Artery Type"] == "Arterioles"]

# Generate 'Biopsy ID' by extracting relevant part from 'Image Name'
combined_classifications["Biopsy ID"] = combined_classifications["Image Name"].apply(lambda x: "Biopsy_" + x.split("_")[1])

# Ensure that there is no data leakage between training and validation sets by consistent split
biopsy_ids = combined_classifications["Biopsy ID"].unique()
# np.random.shuffle(biopsy_ids)  # Randomize the order to avoid biased splits

# Assign the first 180 unique Biopsy IDs to training and the rest to validation
train_biopsy_ids = biopsy_ids[:180]
val_biopsy_ids = biopsy_ids[180:]

# Select records for training and validation datasets based on 'Biopsy ID'
train_files = combined_classifications[combined_classifications["Biopsy ID"].isin(train_biopsy_ids)]["Image Name"].values
val_files = combined_classifications[combined_classifications["Biopsy ID"].isin(val_biopsy_ids)]["Image Name"].values

# Update configurations with the lists of training and validation files
configs["data"]["train_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in train_files]  # Convert NumPy array to list for JSON compatibility if needed
configs["data"]["val_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in val_files]  # Convert NumPy array to list for JSON compatibility if needed


In [4]:
import shutil

for x in val_files:
    img_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png"))
    mask_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_mask.png"))
    img_path_dst = img_path.replace(CROPPED_VESSELS_COMBINED_DIR, CROPPED_VESSELS_COMBINED_DIR+"_Test")
    mask_path_dst = mask_path.replace(CROPPED_VESSELS_COMBINED_DIR, CROPPED_VESSELS_COMBINED_DIR+"_Test")
    # Copy files
    shutil.copy(img_path, img_path_dst)
    shutil.copy(mask_path, mask_path_dst)


In [5]:

# Save the updated configuration back to the file
configs["inference"]["inference_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR+"_Test", x.replace(".png", "_ori.png")) for x in val_files]  # Convert NumPy array to list for JSON compatibility if needed

with open(CONFIG_PATH, 'w') as config_file:
    json.dump(configs, config_file, indent=4)

In [7]:
len(configs["inference"]["inference_files"])

279

In [9]:
for x in configs["inference"]["inference_files"]:
    print(os.path.exists(x))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
