In [3]:
# Import necessary libraries
import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical operations
import pickle # For serializing and deserializing Python objects
from modelling_utils import train_knn_classifier, evaluate_model, plot_model_comparison, extract_model_features, compute_knn_indices, compute_overlap_matrix, plot_overlap_matrix, split_shuffle_data, apply_aggregation_filter

In [4]:
# Load features from a pickle file
feature_dict_path = "/home/suraj/Repositories/FM-extractors-radiomics/evaluation/features/nsclc_radiomics.pkl" # Path to the pickle file containing features
with open(feature_dict_path, 'rb') as file: # Open the file in read binary mode
    data = pickle.load(file) # Load the data from the pickle file

In [5]:
# Store test accuracies for each model
test_accuracies_dict = {} # Initialize an empty dictionary to store test accuracies

# Iterate through each model's features
for model_name, values in data.items(): # Loop through each model and its corresponding features

    # Extract paths and labels for train, val, and test sets
    train_labels = [v["row"]["survival"] for v in values["train"]] # Extract survival labels for the training set
    val_labels = [v["row"]["survival"] for v in values["val"]] # Extract survival labels for the validation set
    test_labels = [v["row"]["survival"] for v in values["test"]] # Extract survival labels for the test set
    
    # Stack features
    train_items = np.vstack([apply_aggregation_filter(v["feature"], model_name) for v in values["train"]]) # Stack features for the training set
    val_items = np.vstack([apply_aggregation_filter(v["feature"], model_name) for v in values["val"]]) # Stack features for the validation set
    test_items = np.vstack([apply_aggregation_filter(v["feature"], model_name) for v in values["test"]]) # Stack features for the test set


    all_items = np.vstack([train_items, val_items, test_items]) # Stack features for all sets
    all_labels = train_labels + val_labels + test_labels # Combine labels for all sets

    # Average across multiple shuffle splits
    n_splits = 10
    split_scores = []
    
    for split in range(n_splits):
        # Get stratified indices for new train/val split with different random seeds
        train_items, train_labels, val_items, val_labels, test_items, test_labels = split_shuffle_data(
            all_items, all_labels, train_ratio=0.5, val_ratio=0.2, random_seed=10+split, stratify=True
        )

        # Train model with hyperparameter optimization
        best_model, study = train_knn_classifier(train_items, train_labels, val_items, val_labels)
        
        # Get score for this split
        split_score = evaluate_model(best_model, test_items, test_labels)
        split_scores.append(split_score)
    
    # Average the scores across splits
    avg_score = np.mean(split_scores)
    
    # Compute the 95% confidence interval assuming normality
    std_error = np.std(split_scores, ddof=1) / np.sqrt(n_splits)
    margin = 1.96 * std_error
    ci_lower = avg_score - margin
    ci_upper = avg_score + margin
    
    # Evaluate on test set and store mean score with 95% CI
    test_accuracies_dict[model_name] = {"mean": avg_score, "ci95": (ci_lower, ci_upper)}

[I 2025-03-12 16:55:46,158] A new study created in memory with name: no-name-271fcf6b-be3a-4269-a886-5e2cf4b90268


[I 2025-03-12 16:55:46,168] Trial 0 finished with value: 0.5989042675893888 and parameters: {'k': 29}. Best is trial 0 with value: 0.5989042675893888.
[I 2025-03-12 16:55:46,175] Trial 1 finished with value: 0.552479815455594 and parameters: {'k': 12}. Best is trial 0 with value: 0.5989042675893888.
[I 2025-03-12 16:55:46,182] Trial 2 finished with value: 0.5671856978085352 and parameters: {'k': 11}. Best is trial 0 with value: 0.5989042675893888.
[I 2025-03-12 16:55:46,189] Trial 3 finished with value: 0.5908304498269896 and parameters: {'k': 42}. Best is trial 0 with value: 0.5989042675893888.
[I 2025-03-12 16:55:46,196] Trial 4 finished with value: 0.5594002306805075 and parameters: {'k': 3}. Best is trial 0 with value: 0.5989042675893888.
[I 2025-03-12 16:55:46,204] Trial 5 finished with value: 0.6061130334486736 and parameters: {'k': 28}. Best is trial 5 with value: 0.6061130334486736.
[I 2025-03-12 16:55:46,211] Trial 6 finished with value: 0.5925605536332179 and parameters: {'k'

In [7]:
# Plot test accuracies
fig = plot_model_comparison(test_accuracies_dict, font_size=30, height=1200, width=800, marker_color="#00A2FF")
fig.show() # Show the plot

In [8]:
model_features = extract_model_features(data)
model_neighbors = compute_knn_indices(model_features, num_neighbors=10, metric="cosine")
overlap_matrix, model_list = compute_overlap_matrix(model_neighbors)
fig = plot_overlap_matrix(overlap_matrix, model_list)
fig.show()


In [6]:
from pathlib import Path

if Path("overall_results.csv").exists():
    df = pd.read_csv("overall_results.csv")
else:
    df = pd.DataFrame()

df["NSCLC_Radiomics"] = [v["mean"] for k,v in test_accuracies_dict.items()]
df.to_csv("overall_results.csv")

In [7]:
model_features = extract_model_features(data)
model_neighbors = compute_knn_indices(model_features, num_neighbors=10, metric="cosine")
overlap_matrix, model_list = compute_overlap_matrix(model_neighbors)
fig = plot_overlap_matrix(overlap_matrix, model_list, font_size=30, tickangle=45)
fig.show()