In [1]:
# Basic imports
import os
import sys
import json

import pandas as pd 
import numpy as np 
import cv2
from datetime import datetime
sys.path.append(os.path.abspath('../..'))
from utils.utils_constants import (COMBINED_CLASSIFICATION_PATH, 
                                   CROPPED_VESSELS_COMBINED_DIR, 
                                   CLASSIFICATION_SEVERITY_MAPPING)

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
CONFIG_PATH = f"./config_hyalinosis.json"
CONFIG_PATH

'./config_hyalinosis.json'

In [2]:
# combined_classifications

In [3]:
configs = {
    "task": "hyalinosis",
    
    "net": {
        "in_channels": 3,
        "n_classes": 2,
        "padding": True,  # Ensure boolean values are capitalized in Python
        "depth": 5,
        "wf": 4,
        "up_mode": "upconv",
        "batch_norm": True,  # Ensure boolean values are capitalized in Python
    },

    "training": {
        "gpu_id": 0,
        "batch_size": 10,
        "num_epochs": 50,
        "lr": 0.001,
        "edge_weight": 1.2,
        "class_weights": [0.47, 0.53],  # Python lists should be formatted correctly
        "ignore_index": -100
    },

    "data": {
        "base_path": "/DataMount/NEPTUNE/Cropped_Vessels_Combined/",
        "mask_suffix": "_mask.png",
        "resize_to": 256,  # Corrected to include a trailing comma if expecting further entries
        "train_files": [],
        "val_files": []
    },
    "inference": {
        "gpu_id": 1,
        "model_path": "models/hyalinosis_model_20240706-234141_epoch_50.pth",
        "save_dir": "/DataMount/NEPTUNE/Cropped_Vessels_Combined_Test_Hyalinosis/",
        "pred_suffix": "_pred_hya_20240706-234141_epoch_50.png",
        "batch_size": 1,
        "inference_files": [],
    }
}

In [4]:
# combined_classifications = pd.read_csv(COMBINED_CLASSIFICATION_PATH)
# # combined_classifications = combined_classifications[combined_classifications["Artery Type"] != "Others"]
# combined_classifications = combined_classifications[combined_classifications["Artery Type"] == "Arterioles"]

# # Generate 'Biopsy ID' by extracting relevant part from 'Image Name'
# combined_classifications["Biopsy ID"] = combined_classifications["Image Name"].apply(lambda x: "Biopsy_" + x.split("_")[1])

# # Ensure that there is no data leakage between training and validation sets by consistent split
# biopsy_ids = combined_classifications["Biopsy ID"].unique()
# # np.random.shuffle(biopsy_ids)  # Randomize the order to avoid biased splits

# # Assign the first 180 unique Biopsy IDs to training and the rest to validation
# train_biopsy_ids = biopsy_ids[:180]
# val_biopsy_ids = biopsy_ids[180:]

# # Select records for training and validation datasets based on 'Biopsy ID'
# train_files = combined_classifications[combined_classifications["Biopsy ID"].isin(train_biopsy_ids)]["Image Name"].values
# val_files = combined_classifications[combined_classifications["Biopsy ID"].isin(val_biopsy_ids)]["Image Name"].values

# # Update configurations with the lists of training and validation files
# configs["data"]["train_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in train_files]  # Convert NumPy array to list for JSON compatibility if needed
# configs["data"]["val_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in val_files]  # Convert NumPy array to list for JSON compatibility if needed


In [5]:
combined_classifications = pd.read_csv(COMBINED_CLASSIFICATION_PATH)
combined_classifications["Hyalinosis Severity"] = combined_classifications["Hyalinosis Severity"].map(CLASSIFICATION_SEVERITY_MAPPING)

# combined_classifications = combined_classifications[combined_classifications["Artery Type"] != "Others"]
combined_classifications = combined_classifications[combined_classifications["Artery Type"] == "Arterioles"]

hyalinosis_color = (128, 0, 128)
pos_img_names = []
fake_pos_img_names = []
for img_name in combined_classifications[combined_classifications["Hyalinosis Severity"] > 0]["Image Name"]:
    mask_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, img_name.replace(".png", "_mask.png"))
    mask = cv2.cvtColor(cv2.imread(mask_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)  # Ensure the mask is read in color mode
    if np.any(np.all(mask == hyalinosis_color, axis=-1)):
        pos_img_names.append(img_name)
    else:
        fake_pos_img_names.append(img_name)

neg_img_names = []
fake_neg_img_names = []
for img_name in combined_classifications[combined_classifications["Hyalinosis Severity"] == 0]["Image Name"]:
    mask_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, img_name.replace(".png", "_mask.png"))
    mask = cv2.cvtColor(cv2.imread(mask_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)  # Ensure the mask is read in color mode
    if not np.any(np.all(mask == hyalinosis_color, axis=-1)):
        neg_img_names.append(img_name)
    else:
        fake_neg_img_names.append(img_name)

# Ensure that there is no data leakage between training and validation sets by consistent split
# filtered_classifications = combined_classifications[combined_classifications["Image Name"].isin(pos_img_names+neg_img_names)]

# Select records for training and validation datasets based on 'Biopsy ID'
import random

random.seed(42)  # You can choose any number you like for the seed
train_files = pos_img_names + random.sample(neg_img_names, 100)
val_files = fake_pos_img_names + fake_neg_img_names
# Update configurations with the lists of training and validation files
configs["data"]["train_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in train_files]  # Convert NumPy array to list for JSON compatibility if needed
configs["data"]["val_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) for x in val_files]  # Convert NumPy array to list for JSON compatibility if needed


In [6]:
len(val_files)

198

In [11]:
import shutil

for x in train_files + val_files:
    img_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png"))
    mask_path = os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_mask.png"))
    img_path_dst = img_path.replace(CROPPED_VESSELS_COMBINED_DIR, CROPPED_VESSELS_COMBINED_DIR+"_Test")
    mask_path_dst = mask_path.replace(CROPPED_VESSELS_COMBINED_DIR, CROPPED_VESSELS_COMBINED_DIR+"_Test")
    # Copy files
    shutil.copy(img_path, img_path_dst)
    shutil.copy(mask_path, mask_path_dst)


In [8]:
type(val_files)

list

In [14]:
configs["inference"]["inference_files"] = [os.path.join(CROPPED_VESSELS_COMBINED_DIR, x.replace(".png", "_ori.png")) 
                                           for x in combined_classifications["Image Name"].values]  

In [15]:
with open(CONFIG_PATH, 'w') as config_file:
    json.dump(configs, config_file, indent=4)