In [3]:
import pickle
import random
from pathlib import Path
import itertools

# -- 1. Define raters and paths --
IDENTITIES = [
    "Hyunsuk Shim", "Brent Weinberg", "Sulaiman Sheriff", 
    "Peter Barker", "Andrew Maudsley", "Eric Mellon", 
    "Brian Soher", "Hui-Kuo Shu", "Harish Poptani",
     "Karthik Ramesh"
]

# The "regular" raters we use to assign 3 each
REGULAR_RATERS = [
    "Hyunsuk Shim", "Brent Weinberg", "Sulaiman Sheriff", 
    "Peter Barker", "Andrew Maudsley", "Eric Mellon", 
    "Brian Soher", "Hui-Kuo Shu", "Harish Poptani"
]

# Paths
base_dir = Path.cwd().parent
processed_folder = base_dir / "data" / "processed" / "20250411_202214"
spectral_file_pattern = "unique_ids_group_25_*.pkl" 

# Use glob to retrieve a list of files that match the pattern
matching_files = list(processed_folder.glob(spectral_file_pattern))
print(matching_files)
if not matching_files:
    raise FileNotFoundError(f"No file found matching pattern {spectral_file_pattern}")

# If more than one file is found, you can choose the first one or apply further filtering.
spectral_file = matching_files[0]

# For the output file, add "_raters" to the stem of the original file name
spectral_file_assigned = spectral_file.parent / (spectral_file.stem + "_raters" + spectral_file.suffix)
print(spectral_file_assigned)


[WindowsPath('x:/ArtifactRemovalProject/data/processed/20250411_202214/unique_ids_group_25_20250411_202222.pkl')]
x:\ArtifactRemovalProject\data\processed\20250411_202214\unique_ids_group_25_20250411_202222_raters.pkl


In [4]:
with open(spectral_file, "rb") as file:
    spectral_data = pickle.load(file)

# Ensure each entry has an 'assigned_raters' field starting as an empty list
for entry in spectral_data:
    entry["assigned_raters"] = []

# -- 3. Balanced assignment of 3 raters from the pool for all spectra --
rater_counts = {rater: 0 for rater in REGULAR_RATERS}

# For every spectrum, choose a valid combination of 3 raters that satisfies:
#    - Sulaiman Sheriff and Andrew Maudsley are NOT assigned together
#    - Peter Barker and Brian Soher are NOT assigned together
for entry in spectral_data:
    valid_combinations = []
    # Evaluate all combinations of 3 out of the REGULAR_RATERS
    for combo in itertools.combinations(REGULAR_RATERS, 3):
        # Exclude if forbidden pairs are present:
        if ("Sulaiman Sheriff" in combo and "Andrew Maudsley" in combo):
            continue
        if ("Peter Barker" in combo and "Brian Soher" in combo):
            continue
        # Compute a score based on current assignment counts to favor a balanced distribution
        combo_score = sum(rater_counts[rater] for rater in combo)
        valid_combinations.append((combo_score, combo))
    
    # Choose the valid combination with the minimum total assignment count
    if valid_combinations:
        _, selected_raters = min(valid_combinations, key=lambda x: x[0])
    else:
        # Fallback to the default assignment if no valid combo is found (this branch should rarely be hit)
        selected_raters = sorted(REGULAR_RATERS, key=lambda r: rater_counts[r])[:3]
    
    # Assign the chosen raters to the entry
    entry["assigned_raters"] = list(selected_raters)
    
    # Update the assignment counts for the selected raters
    for rater in selected_raters:
        rater_counts[rater] += 1

# -- 4. Randomly select 5 spectra to also include "Karthik Ramesh" --
five_for_karthik = random.sample(spectral_data, 5)
for entry in five_for_karthik:
    if "Karthik Ramesh" not in entry["assigned_raters"]:
        entry["assigned_raters"].append("Karthik Ramesh")

# -- 5. Print final counts and verification --
print("Assignments per regular rater:")
for rater in REGULAR_RATERS:
    print(f"{rater}: {rater_counts[rater]}")

# Verify that exactly 5 spectra include Karthik Ramesh
karthik_test_count = sum(1 for entry in spectral_data if "Karthik Ramesh" in entry["assigned_raters"])
print(f"\nNumber of spectra with 'Karthik Ramesh' added: {karthik_test_count} (expected: 5)")

# -- 6. Save updated data --
with open(spectral_file_assigned, "wb") as file:
    pickle.dump(spectral_data, file)

print(f"\nUpdated spectral data saved to: {spectral_file_assigned}")


c:\Users\VHUANG9\.conda\envs\myenv\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\VHUANG9\.conda\envs\myenv\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll


Assignments per regular rater:
Hyunsuk Shim: 1822
Brent Weinberg: 1822
Sulaiman Sheriff: 1822
Peter Barker: 1821
Andrew Maudsley: 1821
Eric Mellon: 1821
Brian Soher: 1821
Hui-Kuo Shu: 1821
Harish Poptani: 1821

Number of spectra with 'Karthik Ramesh' added: 5 (expected: 5)

Updated spectral data saved to: x:\ArtifactRemovalProject\data\processed\20250411_202214\unique_ids_group_25_20250411_202222_raters.pkl


In [5]:
# -- 7. Display a random sample of five spectra with their unique IDs and assigned raters for verification --
print("\nRandom sample of 5 spectra with assigned raters:")
sample_to_display = random.sample(spectral_data, 5)
for entry in sample_to_display:
    unique_id = entry.get("unique_id", "Unknown ID")
    raters = entry["assigned_raters"]
    print(f"Unique ID: {unique_id}")
    print(f"Assigned Raters: {raters}\n{'-'*40}")


Random sample of 5 spectra with assigned raters:
Unique ID: DOSEESC_UM21_05.01.2019_21_28_40
Assigned Raters: ['Brian Soher', 'Hui-Kuo Shu', 'Harish Poptani']
----------------------------------------
Unique ID: DOSEESC_JH02_06.18.2018_13_36_17
Assigned Raters: ['Brian Soher', 'Hui-Kuo Shu', 'Harish Poptani']
----------------------------------------
Unique ID: DOSEESC_EM02_11.14.2017_13_35_18
Assigned Raters: ['Brian Soher', 'Hui-Kuo Shu', 'Harish Poptani']
----------------------------------------
Unique ID: wholeGBM_004_07_03_2023_07.03.2023_24_34_35
Assigned Raters: ['Brian Soher', 'Hui-Kuo Shu', 'Harish Poptani']
----------------------------------------
Unique ID: rGBM_006_08_01_2023_08.01.2023_19_43_25
Assigned Raters: ['Hyunsuk Shim', 'Brent Weinberg', 'Sulaiman Sheriff']
----------------------------------------
