In [1]:
# Basic imports
import os
import sys
import json

import pandas as pd 
import numpy as np 
from datetime import datetime
sys.path.append(os.path.abspath('../..'))
from utils.utils_constants import COMBINED_CLASSIFICATION_PATH, CROPPED_VESSELS_COMBINED_DIR

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
CONFIG_PATH = f"./config_five_classes.json"
CONFIG_PATH

'./config_five_classes.json'

In [2]:
configs = {
    "net": {
        "in_channels": 3,
        "n_classes": 5,
        "padding": True,  # Ensure boolean values are capitalized in Python
        "depth": 5,
        "wf": 4,
        "up_mode": "upconv",
        "batch_norm": True,  # Ensure boolean values are capitalized in Python
    },

    "training": {
        "gpu_id": 0,
        "batch_size": 10,
        "num_epochs": 50,
        "lr": 0.001,
        "edge_weight": 1.2,
        "class_weights": [0.47, 0.53],  # Python lists should be formatted correctly
        "ignore_index": -100
    },

    "data": {
        "base_path": "/DataMount/NEPTUNE/Cropped_Vessels_Combined/",
        "mask_suffix": "_mask.png",
        "resize_to": 256,  # Corrected to include a trailing comma if expecting further entries
        "train_files": [],
        "val_files": []
    }
}

In [3]:
combined_classifications = pd.read_csv(COMBINED_CLASSIFICATION_PATH)
combined_classifications = combined_classifications[combined_classifications["Artery Type"] != "Others"]
# Generate 'Biopsy ID' by extracting relevant part from 'Image Name'
combined_classifications["Biopsy ID"] = combined_classifications["Image Name"].apply(lambda x: "Biopsy_" + x.split("_")[1])

# Ensure that there is no data leakage between training and validation sets by consistent split
biopsy_ids = combined_classifications["Biopsy ID"].unique()
np.random.shuffle(biopsy_ids)  # Randomize the order to avoid biased splits

# Assign the first 180 unique Biopsy IDs to training and the rest to validation
train_biopsy_ids = biopsy_ids[:180]
val_biopsy_ids = biopsy_ids[180:]

# Select records for training and validation datasets based on 'Biopsy ID'
train_files = combined_classifications[combined_classifications["Biopsy ID"].isin(train_biopsy_ids)]["Image Name"].values
val_files = combined_classifications[combined_classifications["Biopsy ID"].isin(val_biopsy_ids)]["Image Name"].values

# Update configurations with the lists of training and validation files
configs["data"]["train_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in train_files]  # Convert NumPy array to list for JSON compatibility if needed
configs["data"]["val_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in val_files]  # Convert NumPy array to list for JSON compatibility if needed

# Save the updated configuration back to the file

with open(CONFIG_PATH, 'w') as config_file:
    json.dump(configs, config_file, indent=4)