In [1]:
# Import necessary libraries

import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical operations
import pickle # For serializing and deserializing Python objects
from modelling_utils import train_knn_classifier, evaluate_model, plot_model_comparison, extract_model_features, compute_knn_indices, compute_overlap_matrix, plot_overlap_matrix, split_shuffle_data

In [8]:
# Load features from a pickle file
feature_dict_path = "/home/suraj/Repositories/FM-extractors-radiomics/evaluation/features/c4c_kits.pkl" # Path to the pickle file containing features
with open(feature_dict_path, 'rb') as file: # Open the file in read binary mode
    data = pickle.load(file) # Load the data from the pickle file

In [9]:
# Store test accuracies for each model
test_accuracies_dict = {} # Initialize an empty dictionary to store test accuracies

# Process survival data
for model_name, values in data.items():  # Iterate through each model
    for dataset in ["train"]:  # Apply to train, val, and test data
        vital_days = [v["row"]["vital_days_after_surgery"] for v in values[dataset]]  # Extract vital days
        vital_status = [v["row"]["vital_status"] for v in values[dataset]]  # Extract vital status
        survival = []
        
        for days, censor in zip(vital_days, vital_status):
            if days >= 730:
                survival.append(1)
            elif censor != 'censored' and days < 730:
                survival.append(0)
            else:
                survival.append(np.nan)

        items = data[model_name][dataset]
        for idx in range(len(items)):
            items[idx]["row"]["survival"] = survival[idx]  # Store survival status

In [10]:
# Iterate through each model's features
for model_name, values in data.items(): # Loop through each model and its corresponding features
    # Skip MedImageInsightExtractor
    if model_name == "MedImageInsightExtractor": # Skip the MedImageInsightExtractor model
        continue
    
    print(f"Model: {model_name}") # Print the model name
    
    # Extract labels and features, filtering out nans
    all_labels = [v["row"]["survival"] for v in values["train"] if not np.isnan(v["row"]["survival"])]
    # Stack features for non-nan labels
    all_items = np.vstack([v["feature"] for v in values["train"] if not np.isnan(v["row"]["survival"])])

    print(len(all_items))
    # Average across multiple shuffle splits
    n_splits = 5
    split_scores = []
    
    for split in range(n_splits):
        # Get stratified indices for new train/val split with different random seeds
        train_items, train_labels, val_items, val_labels, test_items, test_labels = split_shuffle_data(
            all_items, all_labels, train_ratio=0.5, val_ratio=0.1, random_seed=10+split, stratify=True
        )

        # Train model with hyperparameter optimization
        best_model, study = train_knn_classifier(train_items, train_labels, val_items, val_labels)
        
        # Get score for this split
        split_score = evaluate_model(best_model, test_items, test_labels)
        split_scores.append(split_score)
    
    # Average the scores across splits
    avg_score = np.mean(split_scores)
    
    # Evaluate on test set
    test_accuracies_dict[model_name] = avg_score

[I 2025-02-25 17:07:08,644] A new study created in memory with name: no-name-66422fe1-0c9a-4a3f-86e5-ffb40fae9a92
[I 2025-02-25 17:07:08,649] Trial 0 finished with value: 0.875 and parameters: {'k': 29}. Best is trial 0 with value: 0.875.
[I 2025-02-25 17:07:08,653] Trial 1 finished with value: 0.9583333333333333 and parameters: {'k': 12}. Best is trial 1 with value: 0.9583333333333333.
[I 2025-02-25 17:07:08,658] Trial 2 finished with value: 0.9583333333333333 and parameters: {'k': 11}. Best is trial 1 with value: 0.9583333333333333.
[I 2025-02-25 17:07:08,663] Trial 3 finished with value: 0.875 and parameters: {'k': 42}. Best is trial 1 with value: 0.9583333333333333.
[I 2025-02-25 17:07:08,667] Trial 4 finished with value: 0.8333333333333333 and parameters: {'k': 3}. Best is trial 1 with value: 0.9583333333333333.
[I 2025-02-25 17:07:08,672] Trial 5 finished with value: 0.8958333333333333 and parameters: {'k': 28}. Best is trial 1 with value: 0.9583333333333333.
[I 2025-02-25 17:07:

Model: FMCIBExtractor
134


[I 2025-02-25 17:07:08,848] Trial 32 finished with value: 0.875 and parameters: {'k': 41}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,858] Trial 33 finished with value: 0.9166666666666666 and parameters: {'k': 50}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,868] Trial 34 finished with value: 0.6666666666666667 and parameters: {'k': 2}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,878] Trial 35 finished with value: 0.9166666666666667 and parameters: {'k': 13}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,889] Trial 36 finished with value: 0.875 and parameters: {'k': 38}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,898] Trial 37 finished with value: 0.9166666666666667 and parameters: {'k': 25}. Best is trial 23 with value: 0.9583333333333334.
[I 2025-02-25 17:07:08,907] Trial 38 finished with value: 0.875 and parameters: {'k': 7}. Best is trial 23 wi

Model: CTFMExtractor
134


[I 2025-02-25 17:07:10,745] Trial 35 finished with value: 0.7291666666666667 and parameters: {'k': 13}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,753] Trial 36 finished with value: 0.4375 and parameters: {'k': 38}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,761] Trial 37 finished with value: 0.5833333333333333 and parameters: {'k': 25}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,770] Trial 38 finished with value: 0.8958333333333333 and parameters: {'k': 7}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,778] Trial 39 finished with value: 0.5833333333333333 and parameters: {'k': 24}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,787] Trial 40 finished with value: 0.47916666666666674 and parameters: {'k': 37}. Best is trial 30 with value: 0.8958333333333333.
[I 2025-02-25 17:07:10,796] Trial 41 finished with value: 0.4583333333333333 and parameters: {

Model: PyramidExtractorVar
134


[I 2025-02-25 17:07:12,414] Trial 35 finished with value: 0.8958333333333333 and parameters: {'k': 13}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,422] Trial 36 finished with value: 0.6666666666666666 and parameters: {'k': 38}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,431] Trial 37 finished with value: 0.9166666666666666 and parameters: {'k': 25}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,439] Trial 38 finished with value: 0.875 and parameters: {'k': 7}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,447] Trial 39 finished with value: 0.9583333333333333 and parameters: {'k': 24}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,456] Trial 40 finished with value: 0.7083333333333334 and parameters: {'k': 37}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,465] Trial 41 finished with value: 0.9583333333333334 and parameters: {'k': 22}. Best is trial 27 with value: 1.0.
[I 2025-02-25 17:07:12,474] Trial 42 finished wi

Model: PyramidExtractorNoVar
134


[I 2025-02-25 17:07:14,096] Trial 34 finished with value: 0.4166666666666667 and parameters: {'k': 2}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,104] Trial 35 finished with value: 0.7916666666666666 and parameters: {'k': 13}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,112] Trial 36 finished with value: 0.7291666666666667 and parameters: {'k': 38}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,120] Trial 37 finished with value: 0.7291666666666667 and parameters: {'k': 25}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,129] Trial 38 finished with value: 0.8958333333333333 and parameters: {'k': 7}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,137] Trial 39 finished with value: 0.7291666666666667 and parameters: {'k': 24}. Best is trial 15 with value: 0.8958333333333333.
[I 2025-02-25 17:07:14,146] Trial 40 finished with value: 0.625 and parameters: {'k'

Model: VISTA3DExtractor
134


[I 2025-02-25 17:07:15,779] Trial 34 finished with value: 0.7083333333333333 and parameters: {'k': 2}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,787] Trial 35 finished with value: 0.9166666666666667 and parameters: {'k': 13}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,795] Trial 36 finished with value: 1.0 and parameters: {'k': 38}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,804] Trial 37 finished with value: 0.9583333333333333 and parameters: {'k': 25}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,812] Trial 38 finished with value: 0.8333333333333333 and parameters: {'k': 7}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,821] Trial 39 finished with value: 0.9166666666666667 and parameters: {'k': 24}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,829] Trial 40 finished with value: 1.0 and parameters: {'k': 37}. Best is trial 0 with value: 1.0.
[I 2025-02-25 17:07:15,838] Trial 41 finished with value: 0.9375 and para

Model: VocoExtractor
134


[I 2025-02-25 17:07:17,471] Trial 31 finished with value: 0.75 and parameters: {'k': 18}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,479] Trial 32 finished with value: 0.5625 and parameters: {'k': 41}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,487] Trial 33 finished with value: 0.7291666666666667 and parameters: {'k': 50}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,499] Trial 34 finished with value: 0.375 and parameters: {'k': 2}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,508] Trial 35 finished with value: 0.5833333333333334 and parameters: {'k': 13}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,517] Trial 36 finished with value: 0.5833333333333333 and parameters: {'k': 38}. Best is trial 25 with value: 0.8333333333333333.
[I 2025-02-25 17:07:17,525] Trial 37 finished with value: 0.75 and parameters: {'k': 25}. Best is trial 25 with value: 0.8

Model: SUPREMExtractor
134


[I 2025-02-25 17:07:19,288] Trial 35 finished with value: 0.8333333333333333 and parameters: {'k': 13}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,297] Trial 36 finished with value: 0.7916666666666666 and parameters: {'k': 38}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,305] Trial 37 finished with value: 0.9166666666666667 and parameters: {'k': 25}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,313] Trial 38 finished with value: 0.8541666666666667 and parameters: {'k': 7}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,321] Trial 39 finished with value: 0.9166666666666667 and parameters: {'k': 24}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,330] Trial 40 finished with value: 0.7916666666666666 and parameters: {'k': 37}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 17:07:19,339] Trial 41 finished with value: 0.8958333333333333 and parameters: {'k': 22}. Best is trial 20 with value: 0.9375.
[I 2025-02-25 

Model: MerlinExtractor
134


[I 2025-02-25 17:07:20,941] Trial 35 finished with value: 0.7916666666666667 and parameters: {'k': 13}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,949] Trial 36 finished with value: 0.6666666666666666 and parameters: {'k': 38}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,957] Trial 37 finished with value: 0.5 and parameters: {'k': 25}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,965] Trial 38 finished with value: 0.6666666666666666 and parameters: {'k': 7}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,974] Trial 39 finished with value: 0.5833333333333334 and parameters: {'k': 24}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,982] Trial 40 finished with value: 0.6666666666666666 and parameters: {'k': 37}. Best is trial 20 with value: 0.9583333333333333.
[I 2025-02-25 17:07:20,991] Trial 41 finished with value: 0.7083333333333333 and parameters: {'k':

Model: ModelsGenExtractor
134


[I 2025-02-25 17:07:22,580] Trial 32 finished with value: 0.7916666666666666 and parameters: {'k': 41}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,588] Trial 33 finished with value: 0.7916666666666667 and parameters: {'k': 50}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,596] Trial 34 finished with value: 0.7083333333333333 and parameters: {'k': 2}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,605] Trial 35 finished with value: 0.9166666666666667 and parameters: {'k': 13}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,614] Trial 36 finished with value: 0.8125 and parameters: {'k': 38}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,622] Trial 37 finished with value: 0.875 and parameters: {'k': 25}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,631] Trial 38 finished with value: 0.9166666666666667 and parameters: {'k': 7}. Best is trial 23 with value: 1.0.
[I 2025-02-25 17:07:22,641] Trial 39 finished with value: 0.8

In [11]:
# Plot test accuracies
fig = plot_model_comparison(test_accuracies_dict)
fig.show() # Show the plot

In [12]:
model_features = extract_model_features(data)
model_neighbors = compute_knn_indices(model_features, num_neighbors=10, metric="cosine")
overlap_matrix, model_list = compute_overlap_matrix(model_neighbors)
fig = plot_overlap_matrix(overlap_matrix, model_list)
fig.show()
