# Notebook to generate realistic masks on already lesioned images

In [39]:
import os

def count_and_list_middle_numbers(directory, dataset_name):
    """
    Count and list the distinct subject IDs (middle numbers) for a given dataset.

    Args:
        directory (str): Directory containing the .png files.
        dataset_name (str): One of ['VH', 'WMH2017', 'SHIFTS'].

    Returns:
        int: Number of distinct subject IDs.
    """
    middle_numbers = set()

    for filename in os.listdir(directory):
        if not filename.endswith(".png"):
            continue

        if dataset_name in ['VH', 'WMH2017'] and filename.startswith(dataset_name + "_"):
            parts = filename.split('_')
            if len(parts) >= 3:
                middle_numbers.add(int(parts[1]))

        elif dataset_name == 'SHIFTS':
            if filename.startswith('train_'):
                parts = filename.split('_')
                if len(parts) >= 3:
                    middle_numbers.add(parts[0]+parts[1])
            elif filename.startswith(('dev_in', 'eval_in', 'dev_out')):
                parts = filename.split('_')
                if len(parts) >= 4:
                    middle_numbers.add(parts[0]+parts[1]+parts[2])

    sorted_ids = sorted(middle_numbers)
    print(f"{dataset_name}: {len(sorted_ids)} distinct subject IDs")
    print("Subject IDs:", ', '.join(map(str, sorted_ids)))
    
    # return len(sorted_ids)


In [40]:
directory = "/home/benet/data/lesion2D_VH-SHIFTS-WMH2017/test/flair"

# count_and_list_middle_numbers(directory, 'VH')
# count_and_list_middle_numbers(directory, 'WMH2017')
count_and_list_middle_numbers(directory, 'SHIFTS')

# directory = "/home/benet/data/lesion2D_VH-SHIFTS-WMH2017/test/mask"

# count_and_list_middle_numbers(directory, 'VH')
# count_and_list_middle_numbers(directory, 'WMH2017')
# count_and_list_middle_numbers(directory, 'SHIFTS')



SHIFTS: 31 distinct subject IDs
Subject IDs: devin2, devin4, devin6, devout14, devout18, devout20, devout21, devout22, devout24, devout25, devout8, evalin10, evalin18, evalin19, evalin21, evalin25, evalin30, evalin33, evalin4, evalin5, evalin9, train11, train14, train19, train22, train23, train28, train3, train32, train4, train6


In [43]:
directory = "/home/benet/data/lesion2D_VH-SHIFTS-WMH2017_empty_masks/test/flair"

# count_and_list_middle_numbers(directory, 'VH')
# count_and_list_middle_numbers(directory, 'WMH2017')
count_and_list_middle_numbers(directory, 'SHIFTS')
directory = "/home/benet/data/lesion2D_VH-SHIFTS-WMH2017_empty_masks/test/mask"
count_and_list_middle_numbers(directory, 'SHIFTS')


SHIFTS: 31 distinct subject IDs
Subject IDs: devin2, devin4, devin6, devout14, devout18, devout20, devout21, devout22, devout24, devout25, devout8, evalin10, evalin18, evalin19, evalin21, evalin25, evalin30, evalin33, evalin4, evalin5, evalin9, train11, train14, train19, train22, train23, train28, train3, train32, train4, train6
SHIFTS: 31 distinct subject IDs
Subject IDs: devin2, devin4, devin6, devout14, devout18, devout20, devout21, devout22, devout24, devout25, devout8, evalin10, evalin18, evalin19, evalin21, evalin25, evalin30, evalin33, evalin4, evalin5, evalin9, train11, train14, train19, train22, train23, train28, train3, train32, train4, train6


In [29]:
directory = "/home/benet/data/lesion2D_VH-SHIFTS-WMH2017_empty_masks_v1/test/mask"

count_and_list_middle_numbers(directory, 'VH')
count_and_list_middle_numbers(directory, 'WMH2017')
count_and_list_middle_numbers(directory, 'SHIFTS')

VH: 18 distinct subject IDs
Subject IDs: 648, 727, 728, 729, 738, 739, 741, 743, 744, 745, 746, 747, 749, 751, 752, 754, 755, 758
WMH2017: 18 distinct subject IDs
Subject IDs: 2, 11, 17, 21, 35, 37, 41, 53, 62, 65, 68, 100, 104, 112, 114, 126, 132, 137
SHIFTS: 22 distinct subject IDs
Subject IDs: 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 27, 28, 32, 33
