In [43]:
# Import necessary libraries

import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical operations
import pickle # For serializing and deserializing Python objects
from modelling_utils import train_knn_classifier, evaluate_model, plot_model_comparison

In [44]:
# Load features from a pickle file
feature_dict_path = "/home/suraj/Repositories/FM-extractors-radiomics/evaluation/features/nsclc_radiogenomics.pkl" # Path to the pickle file containing features
with open(feature_dict_path, 'rb') as file: # Open the file in read binary mode
    data = pickle.load(file) # Load the data from the pickle file

In [45]:
# Store test accuracies for each model
test_accuracies_dict = {} # Initialize an empty dictionary to store test accuracies

# Iterate through each model's features
for model_name, values in data.items():
    # Skip MedImageInsightExtractor
    if model_name == "MedImageInsightExtractor":
        continue
        
    # Extract labels and features
    train_labels = [v["row"]["survival"] for v in values["train"]]
    val_labels = [v["row"]["survival"] for v in values["val"]] 
    test_labels = [v["row"]["survival"] for v in values["test"]]
    
    train_items = np.vstack([v["feature"] for v in values["train"]])
    val_items = np.vstack([v["feature"] for v in values["val"]])
    test_items = np.vstack([v["feature"] for v in values["test"]])

    # Train model with hyperparameter optimization
    best_model, study = train_knn_classifier(train_items, train_labels, val_items, val_labels, n_trials=30)
    
    # Evaluate on test set
    test_accuracies_dict[model_name] = evaluate_model(best_model, test_items, test_labels)

[I 2025-01-22 16:27:37,706] A new study created in memory with name: no-name-d62270f9-838d-49fc-b4bb-0fdca8932ebd
[I 2025-01-22 16:27:37,715] Trial 0 finished with value: 0.4791666666666667 and parameters: {'k': 3}. Best is trial 0 with value: 0.4791666666666667.
[I 2025-01-22 16:27:37,723] Trial 1 finished with value: 0.6875 and parameters: {'k': 29}. Best is trial 1 with value: 0.6875.
[I 2025-01-22 16:27:37,731] Trial 2 finished with value: 0.6458333333333334 and parameters: {'k': 14}. Best is trial 1 with value: 0.6875.
[I 2025-01-22 16:27:37,739] Trial 3 finished with value: 0.6041666666666667 and parameters: {'k': 11}. Best is trial 1 with value: 0.6875.
[I 2025-01-22 16:27:37,749] Trial 4 finished with value: 0.7395833333333334 and parameters: {'k': 27}. Best is trial 4 with value: 0.7395833333333334.
[I 2025-01-22 16:27:37,757] Trial 5 finished with value: 0.8333333333333334 and parameters: {'k': 25}. Best is trial 5 with value: 0.8333333333333334.
[I 2025-01-22 16:27:37,765] T

In [46]:
# Plot test accuracies
fig = plot_model_comparison(test_accuracies_dict)
fig.show() # Show the plot