In [1]:
# Import necessary libraries

import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical operations
import pickle # For serializing and deserializing Python objects
from modelling_utils import train_knn_classifier, evaluate_model, plot_model_comparison

In [2]:
# Load features from a pickle file
feature_dict_path = "/home/suraj/Repositories/FM-extractors-radiomics/evaluation/features/c4c_kits.pkl" # Path to the pickle file containing features
with open(feature_dict_path, 'rb') as file: # Open the file in read binary mode
    data = pickle.load(file) # Load the data from the pickle file

In [14]:
# Store test accuracies for each model
test_accuracies_dict = {} # Initialize an empty dictionary to store test accuracies

# Process survival data
for model_name, values in data.items():  # Iterate through each model
    for dataset in ["train", "val", "test"]:  # Apply to train, val, and test data
        vital_days = [v["row"]["vital_days_after_surgery"] for v in values[dataset]]  # Extract vital days
        vital_status = [v["row"]["vital_status"] for v in values[dataset]]  # Extract vital status
        survival = []
        
        for days, censor in zip(vital_days, vital_status):
            if days >= 790.5:
                survival.append(1)
            elif censor != 'censored' and days < 790.5:
                survival.append(0)
            else:
                survival.append(np.nan)

        items = data[model_name][dataset]
        for idx in range(len(items)):
            items[idx]["row"]["survival"] = survival[idx]  # Store survival status

In [31]:
# Iterate through each model's features
for model_name, values in data.items(): # Loop through each model and its corresponding features
    # Skip MedImageInsightExtractor
    if model_name == "MedImageInsightExtractor": # Skip the MedImageInsightExtractor model
        continue
    
    print(f"Model: {model_name}") # Print the model name
    
    # Extract labels and features, filtering out nans
    train_labels = [v["row"]["survival"] for v in values["train"] if not np.isnan(v["row"]["survival"])]
    val_labels = [v["row"]["survival"] for v in values["val"] if not np.isnan(v["row"]["survival"])]
    test_labels = [v["row"]["survival"] for v in values["test"] if not np.isnan(v["row"]["survival"])]



    # Stack features for non-nan labels
    train_items = np.vstack([v["feature"] for v in values["train"] if not np.isnan(v["row"]["survival"])])
    val_items = np.vstack([v["feature"] for v in values["val"] if not np.isnan(v["row"]["survival"])])
    test_items = np.vstack([v["feature"] for v in values["test"] if not np.isnan(v["row"]["survival"])])

    # Combine train and val data
    combined_items = np.vstack([train_items, val_items])
    combined_labels = train_labels + val_labels

    # Get stratified indices for new train/val split
    from sklearn.model_selection import train_test_split
    train_items, val_items, train_labels, val_labels = train_test_split(
        combined_items,
        combined_labels,
        test_size=0.2,
        stratify=combined_labels,
        random_state=42,
    )

        # Print label counts
    print("Train labels (0/1):", np.bincount(train_labels))
    print("Val labels (0/1):", np.bincount(val_labels)) 
    print("Test labels (0/1):", np.bincount(test_labels))

    print(f"Train items: {train_items.shape}", 
          f"Train labels: {len(train_labels)}", 
          f"Val items: {val_items.shape}", 
          f"Val labels: {len(val_labels)}", 
          f"Test items: {test_items.shape}", 
          f"Test labels: {len(test_labels)}", sep="\n")

    # Train model with hyperparameter optimization
    best_model, study = train_knn_classifier(train_items, train_labels, val_items, val_labels, n_trials=200)
    
    # Evaluate on test set
    test_accuracies_dict[model_name] = evaluate_model(best_model, test_items, test_labels)

[I 2025-01-22 16:57:43,431] A new study created in memory with name: no-name-bf415315-41bb-46a1-87c2-0d1f23332da7
[I 2025-01-22 16:57:43,449] Trial 0 finished with value: 0.75 and parameters: {'k': 29}. Best is trial 0 with value: 0.75.
[I 2025-01-22 16:57:43,456] Trial 1 finished with value: 0.6730769230769231 and parameters: {'k': 12}. Best is trial 0 with value: 0.75.
[I 2025-01-22 16:57:43,463] Trial 2 finished with value: 0.5192307692307692 and parameters: {'k': 11}. Best is trial 0 with value: 0.75.
[I 2025-01-22 16:57:43,478] Trial 3 finished with value: 0.7307692307692308 and parameters: {'k': 42}. Best is trial 0 with value: 0.75.
[I 2025-01-22 16:57:43,488] Trial 4 finished with value: 0.6346153846153846 and parameters: {'k': 3}. Best is trial 0 with value: 0.75.
[I 2025-01-22 16:57:43,498] Trial 5 finished with value: 0.7692307692307693 and parameters: {'k': 28}. Best is trial 5 with value: 0.7692307692307693.
[I 2025-01-22 16:57:43,507] Trial 6 finished with value: 0.75 and

Model: FMCIBExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 4096)
Train labels: 56
Val items: (15, 4096)
Val labels: 15
Test items: (50, 4096)
Test labels: 50


[I 2025-01-22 16:57:43,631] Trial 19 finished with value: 0.75 and parameters: {'k': 30}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,643] Trial 20 finished with value: 0.5961538461538463 and parameters: {'k': 16}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,654] Trial 21 finished with value: 0.75 and parameters: {'k': 31}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,665] Trial 22 finished with value: 0.7307692307692307 and parameters: {'k': 33}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,677] Trial 23 finished with value: 0.6538461538461539 and parameters: {'k': 17}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,688] Trial 24 finished with value: 0.75 and parameters: {'k': 43}. Best is trial 17 with value: 0.8076923076923077.
[I 2025-01-22 16:57:43,699] Trial 25 finished with value: 0.6538461538461539 and parameters: {'k': 21}. Best is trial 17 wit

Model: CTFMExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 512)
Train labels: 56
Val items: (15, 512)
Val labels: 15
Test items: (50, 512)
Test labels: 50


[I 2025-01-22 16:57:44,235] Trial 23 finished with value: 0.7884615384615385 and parameters: {'k': 17}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,244] Trial 24 finished with value: 0.4230769230769231 and parameters: {'k': 43}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,253] Trial 25 finished with value: 0.7307692307692308 and parameters: {'k': 21}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,262] Trial 26 finished with value: 0.5192307692307693 and parameters: {'k': 44}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,271] Trial 27 finished with value: 0.576923076923077 and parameters: {'k': 9}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,281] Trial 28 finished with value: 0.5 and parameters: {'k': 14}. Best is trial 23 with value: 0.7884615384615385.
[I 2025-01-22 16:57:44,290] Trial 29 finished with value: 0.5384615384615385 and parameters: {'k': 

Model: PyramidExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 512)
Train labels: 56
Val items: (15, 512)
Val labels: 15
Test items: (50, 512)
Test labels: 50


[I 2025-01-22 16:57:44,731] Trial 29 finished with value: 0.48076923076923084 and parameters: {'k': 26}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,741] Trial 30 finished with value: 0.4807692307692308 and parameters: {'k': 6}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,753] Trial 31 finished with value: 0.3653846153846154 and parameters: {'k': 18}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,765] Trial 32 finished with value: 0.40384615384615385 and parameters: {'k': 41}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,777] Trial 33 finished with value: 0.5 and parameters: {'k': 50}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,789] Trial 34 finished with value: 0.4230769230769231 and parameters: {'k': 2}. Best is trial 13 with value: 0.5961538461538461.
[I 2025-01-22 16:57:44,801] Trial 35 finished with value: 0.3461538461538462 and parameters: {'k'

Model: VISTA3DExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 768)
Train labels: 56
Val items: (15, 768)
Val labels: 15
Test items: (50, 768)
Test labels: 50


[I 2025-01-22 16:57:45,224] Trial 25 finished with value: 0.5961538461538463 and parameters: {'k': 21}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,235] Trial 26 finished with value: 0.46153846153846156 and parameters: {'k': 44}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,244] Trial 27 finished with value: 0.7115384615384616 and parameters: {'k': 9}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,253] Trial 28 finished with value: 0.5961538461538461 and parameters: {'k': 14}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,262] Trial 29 finished with value: 0.4807692307692308 and parameters: {'k': 26}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,271] Trial 30 finished with value: 0.7692307692307692 and parameters: {'k': 6}. Best is trial 9 with value: 0.7884615384615385.
[I 2025-01-22 16:57:45,281] Trial 31 finished with value: 0.6153846153846154 and paramete

Model: VocoExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 3072)
Train labels: 56
Val items: (15, 3072)
Val labels: 15
Test items: (50, 3072)
Test labels: 50


[I 2025-01-22 16:57:45,757] Trial 19 finished with value: 0.38461538461538464 and parameters: {'k': 30}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,769] Trial 20 finished with value: 0.6923076923076923 and parameters: {'k': 16}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,781] Trial 21 finished with value: 0.38461538461538464 and parameters: {'k': 31}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,792] Trial 22 finished with value: 0.46153846153846156 and parameters: {'k': 33}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,804] Trial 23 finished with value: 0.6538461538461539 and parameters: {'k': 17}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,817] Trial 24 finished with value: 0.8076923076923077 and parameters: {'k': 43}. Best is trial 3 with value: 0.8846153846153846.
[I 2025-01-22 16:57:45,830] Trial 25 finished with value: 0.42307692307692313 and par

Model: SUPREMExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 512)
Train labels: 56
Val items: (15, 512)
Val labels: 15
Test items: (50, 512)
Test labels: 50


[I 2025-01-22 16:57:46,377] Trial 25 finished with value: 0.7884615384615384 and parameters: {'k': 21}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,388] Trial 26 finished with value: 0.6923076923076923 and parameters: {'k': 44}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,398] Trial 27 finished with value: 0.8076923076923077 and parameters: {'k': 9}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,410] Trial 28 finished with value: 0.7692307692307693 and parameters: {'k': 14}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,421] Trial 29 finished with value: 0.6153846153846154 and parameters: {'k': 26}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,432] Trial 30 finished with value: 0.7307692307692308 and parameters: {'k': 6}. Best is trial 20 with value: 0.8461538461538463.
[I 2025-01-22 16:57:46,443] Trial 31 finished with value: 0.8076923076923077 and par

Model: MerlinExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 512)
Train labels: 56
Val items: (15, 512)
Val labels: 15
Test items: (50, 512)
Test labels: 50


[I 2025-01-22 16:57:46,894] Trial 26 finished with value: 0.6153846153846154 and parameters: {'k': 44}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,905] Trial 27 finished with value: 0.903846153846154 and parameters: {'k': 9}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,917] Trial 28 finished with value: 0.9230769230769231 and parameters: {'k': 14}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,931] Trial 29 finished with value: 0.7307692307692308 and parameters: {'k': 26}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,943] Trial 30 finished with value: 0.9230769230769231 and parameters: {'k': 6}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,954] Trial 31 finished with value: 0.8461538461538461 and parameters: {'k': 18}. Best is trial 9 with value: 0.9230769230769231.
[I 2025-01-22 16:57:46,967] Trial 32 finished with value: 0.7115384615384616 and parameters

Model: ModelsGenExtractor
Train labels (0/1): [ 8 48]
Val labels (0/1): [ 2 13]
Test labels (0/1): [ 6 44]
Train items: (56, 4096)
Train labels: 56
Val items: (15, 4096)
Val labels: 15
Test items: (50, 4096)
Test labels: 50


[I 2025-01-22 16:57:47,433] Trial 18 finished with value: 0.5769230769230769 and parameters: {'k': 49}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,444] Trial 19 finished with value: 0.7307692307692308 and parameters: {'k': 30}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,456] Trial 20 finished with value: 0.6346153846153846 and parameters: {'k': 16}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,471] Trial 21 finished with value: 0.75 and parameters: {'k': 31}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,487] Trial 22 finished with value: 0.7307692307692307 and parameters: {'k': 33}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,503] Trial 23 finished with value: 0.6346153846153846 and parameters: {'k': 17}. Best is trial 13 with value: 0.8076923076923077.
[I 2025-01-22 16:57:47,515] Trial 24 finished with value: 0.6538461538461537 and parameters: {'k

In [32]:
# Plot test accuracies
fig = plot_model_comparison(test_accuracies_dict)
fig.show() # Show the plot