# **A Novel Approach for Three-Way Classification of Lumbar Spine Degeneration Using Pseudo-Modality Learning to Handle Missing MRI Data**

## Libs

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.model_selection import KFold
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import KFold
import joblib
import lightgbm as lgb
from tqdm import tqdm

## Training Pipeline

In [2]:
attention_embeddings_paths = [
    '/kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_hist.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_hist.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_hist.csv'    
]

average_embeddings_paths = [
    '/kaggle/input/embeddings-for-rsna/at2-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/at2-hist/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st1-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st1-hist/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st2-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st2-hist/final_embeddings.csv'
]

medicalnet_embeddings_paths = [
    '/kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_hist.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_hist.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_hist.csv',
]

labels_paths = [
    '/kaggle/input/preprocessed-dataset/train_data_AT2.csv',
    '/kaggle/input/preprocessed-dataset/train_data_ST1.csv',
    '/kaggle/input/preprocessed-dataset/train_data_ST2.csv'
]

list_of_combination = [
    'AT2 - GSL - Attention Network',
    'AT2 - HIST - Attention Network',
    'ST1 - GSL - Attention Network',
    'ST1 - HIST - Attention Network',
    'ST2 - GSL - Attention Network',
    'ST2 - HIST - Attention Network',
    
    'AT2 - GSL - Average ResNet50',
    'AT2 - HIST - Average ResNet50',
    'ST1 - GSL - Average ResNet50',
    'ST1 - HIST - Average ResNet50',
    'ST2 - GSL - Average ResNet50',
    'ST2 - HIST - Average ResNet50',
    
    'AT2 - GSL - MedicalNet Network',
    'AT2 - HIST - MedicalNet Network',
    'ST1 - GSL - MedicalNet Network',
    'ST1 - HIST - MedicalNet Network',
    'ST2 - GSL - MedicalNet Network',
    'ST2 - HIST - MedicalNet Network'
]

results_df = pd.DataFrame(columns=['Combination', 'Avg_Test_Accuracy', 'Avg_Val_Accuracy'])

all_embedding_paths = [
    *attention_embeddings_paths,
    *average_embeddings_paths,
    *medicalnet_embeddings_paths
]

corresponding_labels_paths = [
    labels_paths[0],  # AT2 - GSL - Attention Network
    labels_paths[0],  # AT2 - HIST - Attention Network
    labels_paths[1],  # ST1 - GSL - Attention Network
    labels_paths[1],  # ST1 - HIST - Attention Network
    labels_paths[2],  # ST2 - GSL - Attention Network
    labels_paths[2],  # ST2 - HIST - Attention Network
    
    labels_paths[0],  # AT2 - GSL - Average ResNet50
    labels_paths[0],  # AT2 - HIST - Average ResNet50
    labels_paths[1],  # ST1 - GSL - Average ResNet50
    labels_paths[1],  # ST1 - HIST - Average ResNet50
    labels_paths[2],  # ST2 - GSL - Average ResNet50
    labels_paths[2],  # ST2 - HIST - Average ResNet50
    
    labels_paths[0],  # AT2 - GSL - MedicalNet Network
    labels_paths[0],  # AT2 - HIST - MedicalNet Network
    labels_paths[1],  # ST1 - GSL - MedicalNet Network
    labels_paths[1],  # ST1 - HIST - MedicalNet Network
    labels_paths[2],  # ST2 - GSL - MedicalNet Network
    labels_paths[2],  # ST2 - HIST - MedicalNet Network
]

In [3]:
def train(embedding_path, labels_path, model_save_path_prefix='lgb_model_col_'):
    embeddings = pd.read_csv(embedding_path)
    labels = pd.read_csv(labels_path)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_impute = labels.drop(columns=['study_id', 'series_id'])
    imputed_cols = cols_to_impute.apply(lambda x: x.fillna(x.mode()[0]))
    labels = pd.concat([id_cols, imputed_cols], axis=1)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_encode = labels.drop(columns=['study_id', 'series_id'])
    encoded_df = cols_to_encode.apply(LabelEncoder().fit_transform)
    final_df = pd.concat([id_cols, encoded_df], axis=1)

    df = pd.merge(embeddings, final_df, on='study_id', how='inner')

    X = df.iloc[:, :512].values
    Y = df.iloc[:, 515:].values

    avg_accuracies = []
    avg_f1_scores = []
    avg_roc_auc_scores = []

    for col in tqdm(range(Y.shape[1]), desc="Training Columns"):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:, col], test_size=0.2, random_state=42)

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        try:
            minority_class_size = min(np.bincount(Y_train))
            n_neighbors = min(5, minority_class_size - 1)
            smote = SMOTE(random_state=42, k_neighbors=n_neighbors)
            X_train, Y_train = smote.fit_resample(X_train, Y_train)
        except ValueError as e:
            print(f"Skipping SMOTE for column {col} due to error: {e}")

        lgb_train = lgb.Dataset(X_train, Y_train)

        params = {
            'objective': 'multiclass',
            'num_class': len(np.unique(Y_train)),
            'metric': 'multi_logloss',
            'boosting_type': 'gbdt',
            'learning_rate': 0.1,
            'num_leaves': 31,
            'verbose': -1
        }

        model = lgb.train(params, lgb_train)

        joblib.dump(model, f'{model_save_path_prefix}{col}.joblib')

        Y_pred = model.predict(X_test)
        Y_pred_classes = np.argmax(Y_pred, axis=1)

        accuracy = accuracy_score(Y_test, Y_pred_classes)
        f1 = f1_score(Y_test, Y_pred_classes, average='macro')

        if len(np.unique(Y_test)) > 2:
            roc_auc = roc_auc_score(Y_test, Y_pred, multi_class='ovr')
        else:
            roc_auc = roc_auc_score(Y_test, Y_pred_classes)

        avg_accuracies.append(accuracy)
        avg_f1_scores.append(f1)
        avg_roc_auc_scores.append(roc_auc)

        print(f'Column {col}: Accuracy = {accuracy:.4f}, F1 Score = {f1:.4f}, ROC AUC = {roc_auc:.4f}')

    return np.mean(avg_accuracies), np.mean(avg_f1_scores), np.mean(avg_roc_auc_scores)

In [4]:
results_df = pd.DataFrame(columns=['Combination', 'Avg_Test_Accuracy', 'Avg_F1_Score', 'Avg_ROC_AUC'])

for embedding_path, label_path, name in zip(all_embedding_paths, corresponding_labels_paths, list_of_combination):
    print(f"\nTraining for: {name} - {embedding_path}")
    
    model_save_path_prefix = f'{name}_svm_model_col_'
    
    avg_accuracy, avg_f1_score, avg_roc_auc = train(embedding_path, label_path)
    
    result_row = pd.DataFrame({
        'Combination': [name],
        'Avg_Test_Accuracy': [avg_accuracy],
        'Avg_F1_Score': [avg_f1_score], 
        'Avg_ROC_AUC': [avg_roc_auc],
    })
    
    results_df = pd.concat([results_df, result_row], ignore_index=True)

results_df.to_csv('results_summary.csv', index=False)

print("Training complete. Results saved to 'results_summary.csv'.")


Training for: AT2 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_gsl.csv


Training Columns:  10%|█         | 1/10 [00:17<02:33, 17.01s/it]

Column 0: Accuracy = 0.9799, F1 Score = 0.7641, ROC AUC = 0.6842


Training Columns:  20%|██        | 2/10 [00:32<02:09, 16.19s/it]

Column 1: Accuracy = 0.9447, F1 Score = 0.5292, ROC AUC = 0.8402


Training Columns:  30%|███       | 3/10 [00:49<01:56, 16.59s/it]

Column 2: Accuracy = 0.8459, F1 Score = 0.7027, ROC AUC = 0.8790


Training Columns:  40%|████      | 4/10 [01:04<01:34, 15.83s/it]

Column 3: Accuracy = 0.7638, F1 Score = 0.6702, ROC AUC = 0.8549


Training Columns:  50%|█████     | 5/10 [01:21<01:20, 16.13s/it]

Column 4: Accuracy = 0.7722, F1 Score = 0.6438, ROC AUC = 0.8558


Training Columns:  60%|██████    | 6/10 [01:36<01:03, 15.85s/it]

Column 5: Accuracy = 0.9749, F1 Score = 0.6990, ROC AUC = 0.8689


Training Columns:  70%|███████   | 7/10 [01:53<00:49, 16.40s/it]

Column 6: Accuracy = 0.9497, F1 Score = 0.7214, ROC AUC = 0.8904


Training Columns:  80%|████████  | 8/10 [02:09<00:32, 16.04s/it]

Column 7: Accuracy = 0.8878, F1 Score = 0.7081, ROC AUC = 0.8762


Training Columns:  90%|█████████ | 9/10 [02:25<00:16, 16.08s/it]

Column 8: Accuracy = 0.7772, F1 Score = 0.6627, ROC AUC = 0.8657


Training Columns: 100%|██████████| 10/10 [02:40<00:00, 16.03s/it]

Column 9: Accuracy = 0.7739, F1 Score = 0.6836, ROC AUC = 0.8548

Training for: AT2 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_hist.csv



  results_df = pd.concat([results_df, result_row], ignore_index=True)
Training Columns:  10%|█         | 1/10 [00:16<02:29, 16.67s/it]

Column 0: Accuracy = 0.9765, F1 Score = 0.5088, ROC AUC = 0.6853


Training Columns:  20%|██        | 2/10 [00:32<02:08, 16.03s/it]

Column 1: Accuracy = 0.9380, F1 Score = 0.5115, ROC AUC = 0.8670


Training Columns:  30%|███       | 3/10 [00:49<01:56, 16.58s/it]

Column 2: Accuracy = 0.8425, F1 Score = 0.7028, ROC AUC = 0.8640


Training Columns:  40%|████      | 4/10 [01:04<01:34, 15.82s/it]

Column 3: Accuracy = 0.7554, F1 Score = 0.6615, ROC AUC = 0.8595


Training Columns:  50%|█████     | 5/10 [01:20<01:20, 16.10s/it]

Column 4: Accuracy = 0.7672, F1 Score = 0.6421, ROC AUC = 0.8596


Training Columns:  60%|██████    | 6/10 [01:36<01:03, 15.90s/it]

Column 5: Accuracy = 0.9765, F1 Score = 0.7065, ROC AUC = 0.9070


Training Columns:  70%|███████   | 7/10 [01:53<00:49, 16.41s/it]

Column 6: Accuracy = 0.9481, F1 Score = 0.7107, ROC AUC = 0.8589


Training Columns:  80%|████████  | 8/10 [02:09<00:32, 16.05s/it]

Column 7: Accuracy = 0.8844, F1 Score = 0.6879, ROC AUC = 0.8994


Training Columns:  90%|█████████ | 9/10 [02:25<00:16, 16.06s/it]

Column 8: Accuracy = 0.7655, F1 Score = 0.6580, ROC AUC = 0.8464


Training Columns: 100%|██████████| 10/10 [02:39<00:00, 16.00s/it]

Column 9: Accuracy = 0.7621, F1 Score = 0.6808, ROC AUC = 0.8552

Training for: ST1 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:15<02:17, 15.28s/it]

Column 0: Accuracy = 0.9393, F1 Score = 0.3808, ROC AUC = 0.6308


Training Columns:  20%|██        | 2/10 [00:29<01:55, 14.41s/it]

Column 1: Accuracy = 0.8285, F1 Score = 0.3152, ROC AUC = 0.6604


Training Columns:  30%|███       | 3/10 [00:43<01:41, 14.54s/it]

Column 2: Accuracy = 0.6280, F1 Score = 0.3178, ROC AUC = 0.6256


Training Columns:  40%|████      | 4/10 [00:55<01:20, 13.35s/it]

Column 3: Accuracy = 0.4697, F1 Score = 0.4053, ROC AUC = 0.6066


Training Columns:  50%|█████     | 5/10 [01:08<01:06, 13.34s/it]

Column 4: Accuracy = 0.6755, F1 Score = 0.3040, ROC AUC = 0.5893


Training Columns:  60%|██████    | 6/10 [01:23<00:55, 13.98s/it]

Column 5: Accuracy = 0.9367, F1 Score = 0.3224, ROC AUC = 0.5961


Training Columns:  70%|███████   | 7/10 [01:37<00:41, 13.89s/it]

Column 6: Accuracy = 0.8364, F1 Score = 0.3509, ROC AUC = 0.7013


Training Columns:  80%|████████  | 8/10 [01:52<00:28, 14.17s/it]

Column 7: Accuracy = 0.6306, F1 Score = 0.3519, ROC AUC = 0.6299


Training Columns:  90%|█████████ | 9/10 [02:03<00:13, 13.35s/it]

Column 8: Accuracy = 0.3879, F1 Score = 0.3446, ROC AUC = 0.5793


Training Columns: 100%|██████████| 10/10 [02:18<00:00, 13.87s/it]

Column 9: Accuracy = 0.6860, F1 Score = 0.3367, ROC AUC = 0.6076

Training for: ST1 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:13<02:02, 13.59s/it]

Column 0: Accuracy = 0.9340, F1 Score = 0.3988, ROC AUC = 0.7728


Training Columns:  20%|██        | 2/10 [00:27<01:48, 13.62s/it]

Column 1: Accuracy = 0.8364, F1 Score = 0.3514, ROC AUC = 0.6703


Training Columns:  30%|███       | 3/10 [00:42<01:39, 14.22s/it]

Column 2: Accuracy = 0.6517, F1 Score = 0.3228, ROC AUC = 0.6061


Training Columns:  40%|████      | 4/10 [00:53<01:18, 13.15s/it]

Column 3: Accuracy = 0.4512, F1 Score = 0.3916, ROC AUC = 0.5895


Training Columns:  50%|█████     | 5/10 [01:08<01:09, 13.84s/it]

Column 4: Accuracy = 0.6781, F1 Score = 0.2827, ROC AUC = 0.5255


Training Columns:  60%|██████    | 6/10 [01:22<00:55, 13.77s/it]

Column 5: Accuracy = 0.9393, F1 Score = 0.3229, ROC AUC = 0.6506


Training Columns:  70%|███████   | 7/10 [01:37<00:42, 14.32s/it]

Column 6: Accuracy = 0.8575, F1 Score = 0.3828, ROC AUC = 0.6945


Training Columns:  80%|████████  | 8/10 [01:50<00:27, 13.93s/it]

Column 7: Accuracy = 0.6042, F1 Score = 0.3123, ROC AUC = 0.6003


Training Columns:  90%|█████████ | 9/10 [02:02<00:13, 13.17s/it]

Column 8: Accuracy = 0.4090, F1 Score = 0.3598, ROC AUC = 0.5738


Training Columns: 100%|██████████| 10/10 [02:17<00:00, 13.72s/it]

Column 9: Accuracy = 0.7071, F1 Score = 0.3392, ROC AUC = 0.5838

Training for: ST2 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_gsl.csv



Training Columns:  20%|██        | 1/5 [00:13<00:52, 13.15s/it]

Column 0: Accuracy = 0.9654, F1 Score = 0.3275, ROC AUC = 0.6629


Training Columns:  40%|████      | 2/5 [00:28<00:43, 14.53s/it]

Column 1: Accuracy = 0.9016, F1 Score = 0.3161, ROC AUC = 0.6978


Training Columns:  60%|██████    | 3/5 [00:42<00:28, 14.22s/it]

Column 2: Accuracy = 0.8138, F1 Score = 0.3837, ROC AUC = 0.6196


Training Columns:  80%|████████  | 4/5 [00:57<00:14, 14.60s/it]

Column 3: Accuracy = 0.7181, F1 Score = 0.3593, ROC AUC = 0.6630


Training Columns: 100%|██████████| 5/5 [01:10<00:00, 14.18s/it]

Column 4: Accuracy = 0.9681, F1 Score = 0.3279, ROC AUC = 0.4474

Training for: ST2 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_hist.csv



Training Columns:  20%|██        | 1/5 [00:13<00:53, 13.26s/it]

Column 0: Accuracy = 0.9601, F1 Score = 0.3265, ROC AUC = 0.7347


Training Columns:  40%|████      | 2/5 [00:28<00:43, 14.55s/it]

Column 1: Accuracy = 0.9016, F1 Score = 0.3556, ROC AUC = 0.7389


Training Columns:  60%|██████    | 3/5 [00:42<00:28, 14.18s/it]

Column 2: Accuracy = 0.8085, F1 Score = 0.3109, ROC AUC = 0.6653


Training Columns:  80%|████████  | 4/5 [00:57<00:14, 14.59s/it]

Column 3: Accuracy = 0.7154, F1 Score = 0.3478, ROC AUC = 0.6321


Training Columns: 100%|██████████| 5/5 [01:10<00:00, 14.20s/it]

Column 4: Accuracy = 0.9681, F1 Score = 0.3279, ROC AUC = 0.5580

Training for: AT2 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/at2-greyscl/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:16<02:26, 16.33s/it]

Column 0: Accuracy = 0.9765, F1 Score = 0.7440, ROC AUC = 0.6825


Training Columns:  20%|██        | 2/10 [00:30<02:01, 15.15s/it]

Column 1: Accuracy = 0.9179, F1 Score = 0.4935, ROC AUC = 0.6928


Training Columns:  30%|███       | 3/10 [00:45<01:46, 15.21s/it]

Column 2: Accuracy = 0.8191, F1 Score = 0.6394, ROC AUC = 0.8248


Training Columns:  40%|████      | 4/10 [00:58<01:25, 14.23s/it]

Column 3: Accuracy = 0.7052, F1 Score = 0.5991, ROC AUC = 0.7875


Training Columns:  50%|█████     | 5/10 [01:11<01:08, 13.66s/it]

Column 4: Accuracy = 0.7236, F1 Score = 0.6161, ROC AUC = 0.7934


Training Columns:  60%|██████    | 6/10 [01:28<00:58, 14.69s/it]

Column 5: Accuracy = 0.9715, F1 Score = 0.6856, ROC AUC = 0.8091


Training Columns:  70%|███████   | 7/10 [01:42<00:43, 14.56s/it]

Column 6: Accuracy = 0.9313, F1 Score = 0.6148, ROC AUC = 0.7960


Training Columns:  80%|████████  | 8/10 [01:57<00:29, 14.82s/it]

Column 7: Accuracy = 0.8476, F1 Score = 0.7502, ROC AUC = 0.8163


Training Columns:  90%|█████████ | 9/10 [02:10<00:14, 14.09s/it]

Column 8: Accuracy = 0.7487, F1 Score = 0.6473, ROC AUC = 0.8217


Training Columns: 100%|██████████| 10/10 [02:24<00:00, 14.44s/it]

Column 9: Accuracy = 0.7521, F1 Score = 0.6724, ROC AUC = 0.8269

Training for: AT2 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/at2-hist/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:14<02:14, 14.95s/it]

Column 0: Accuracy = 0.9765, F1 Score = 0.7440, ROC AUC = 0.6825


Training Columns:  20%|██        | 2/10 [00:31<02:07, 15.93s/it]

Column 1: Accuracy = 0.9363, F1 Score = 0.5188, ROC AUC = 0.8533


Training Columns:  30%|███       | 3/10 [00:45<01:46, 15.15s/it]

Column 2: Accuracy = 0.8157, F1 Score = 0.6331, ROC AUC = 0.8244


Training Columns:  40%|████      | 4/10 [01:00<01:30, 15.06s/it]

Column 3: Accuracy = 0.7219, F1 Score = 0.6232, ROC AUC = 0.7970


Training Columns:  50%|█████     | 5/10 [01:13<01:11, 14.39s/it]

Column 4: Accuracy = 0.7370, F1 Score = 0.6168, ROC AUC = 0.8303


Training Columns:  60%|██████    | 6/10 [01:30<01:00, 15.07s/it]

Column 5: Accuracy = 0.9698, F1 Score = 0.6795, ROC AUC = 0.8537


Training Columns:  70%|███████   | 7/10 [01:45<00:45, 15.00s/it]

Column 6: Accuracy = 0.9430, F1 Score = 0.7092, ROC AUC = 0.8279


Training Columns:  80%|████████  | 8/10 [01:59<00:29, 14.70s/it]

Column 7: Accuracy = 0.8442, F1 Score = 0.7105, ROC AUC = 0.8559


Training Columns:  90%|█████████ | 9/10 [02:13<00:14, 14.73s/it]

Column 8: Accuracy = 0.7337, F1 Score = 0.6459, ROC AUC = 0.8181


Training Columns: 100%|██████████| 10/10 [02:27<00:00, 14.72s/it]

Column 9: Accuracy = 0.7387, F1 Score = 0.6531, ROC AUC = 0.8251

Training for: ST1 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st1-greyscl/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:14<02:11, 14.62s/it]

Column 0: Accuracy = 0.9156, F1 Score = 0.3650, ROC AUC = 0.6393


Training Columns:  20%|██        | 2/10 [00:26<01:45, 13.23s/it]

Column 1: Accuracy = 0.7784, F1 Score = 0.3513, ROC AUC = 0.5552


Training Columns:  30%|███       | 3/10 [00:38<01:26, 12.38s/it]

Column 2: Accuracy = 0.6042, F1 Score = 0.3756, ROC AUC = 0.5553


Training Columns:  40%|████      | 4/10 [00:50<01:12, 12.15s/it]

Column 3: Accuracy = 0.4195, F1 Score = 0.3734, ROC AUC = 0.5669


Training Columns:  50%|█████     | 5/10 [01:01<00:59, 11.90s/it]

Column 4: Accuracy = 0.6332, F1 Score = 0.3237, ROC AUC = 0.5732


Training Columns:  60%|██████    | 6/10 [01:16<00:51, 12.79s/it]

Column 5: Accuracy = 0.9024, F1 Score = 0.3370, ROC AUC = 0.6473


Training Columns:  70%|███████   | 7/10 [01:28<00:37, 12.63s/it]

Column 6: Accuracy = 0.7889, F1 Score = 0.3594, ROC AUC = 0.6080


Training Columns:  80%|████████  | 8/10 [01:39<00:24, 12.23s/it]

Column 7: Accuracy = 0.5646, F1 Score = 0.3629, ROC AUC = 0.5373


Training Columns:  90%|█████████ | 9/10 [01:51<00:12, 12.09s/it]

Column 8: Accuracy = 0.3799, F1 Score = 0.3520, ROC AUC = 0.5178


Training Columns: 100%|██████████| 10/10 [02:02<00:00, 12.29s/it]

Column 9: Accuracy = 0.6517, F1 Score = 0.4221, ROC AUC = 0.5902

Training for: ST1 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st1-hist/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:14<02:14, 14.91s/it]

Column 0: Accuracy = 0.9156, F1 Score = 0.3607, ROC AUC = 0.7045


Training Columns:  20%|██        | 2/10 [00:27<01:49, 13.64s/it]

Column 1: Accuracy = 0.7704, F1 Score = 0.3817, ROC AUC = 0.5624


Training Columns:  30%|███       | 3/10 [00:39<01:29, 12.73s/it]

Column 2: Accuracy = 0.6069, F1 Score = 0.4036, ROC AUC = 0.5950


Training Columns:  40%|████      | 4/10 [00:51<01:14, 12.49s/it]

Column 3: Accuracy = 0.4037, F1 Score = 0.3762, ROC AUC = 0.5795


Training Columns:  50%|█████     | 5/10 [01:03<01:01, 12.33s/it]

Column 4: Accuracy = 0.6227, F1 Score = 0.3450, ROC AUC = 0.5105


Training Columns:  60%|██████    | 6/10 [01:16<00:50, 12.60s/it]

Column 5: Accuracy = 0.9024, F1 Score = 0.3542, ROC AUC = 0.5870


Training Columns:  70%|███████   | 7/10 [01:30<00:39, 13.17s/it]

Column 6: Accuracy = 0.7810, F1 Score = 0.3907, ROC AUC = 0.6354


Training Columns:  80%|████████  | 8/10 [01:42<00:25, 12.78s/it]

Column 7: Accuracy = 0.5567, F1 Score = 0.3472, ROC AUC = 0.5551


Training Columns:  90%|█████████ | 9/10 [01:54<00:12, 12.54s/it]

Column 8: Accuracy = 0.4063, F1 Score = 0.3808, ROC AUC = 0.5507


Training Columns: 100%|██████████| 10/10 [02:06<00:00, 12.67s/it]

Column 9: Accuracy = 0.6069, F1 Score = 0.3506, ROC AUC = 0.5577

Training for: ST2 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st2-greyscl/final_embeddings.csv



Training Columns:  20%|██        | 1/5 [00:12<00:50, 12.57s/it]

Column 0: Accuracy = 0.9441, F1 Score = 0.3238, ROC AUC = 0.5421


Training Columns:  40%|████      | 2/5 [00:26<00:40, 13.54s/it]

Column 1: Accuracy = 0.8245, F1 Score = 0.3355, ROC AUC = 0.7029


Training Columns:  60%|██████    | 3/5 [00:38<00:25, 12.90s/it]

Column 2: Accuracy = 0.7394, F1 Score = 0.4077, ROC AUC = 0.6469


Training Columns:  80%|████████  | 4/5 [00:52<00:13, 13.09s/it]

Column 3: Accuracy = 0.6649, F1 Score = 0.3918, ROC AUC = 0.6544


Training Columns: 100%|██████████| 5/5 [01:05<00:00, 13.03s/it]

Column 4: Accuracy = 0.9495, F1 Score = 0.3663, ROC AUC = 0.5085

Training for: ST2 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st2-hist/final_embeddings.csv



Training Columns:  20%|██        | 1/5 [00:13<00:52, 13.06s/it]

Column 0: Accuracy = 0.9521, F1 Score = 0.3252, ROC AUC = 0.6620


Training Columns:  40%|████      | 2/5 [00:27<00:42, 14.08s/it]

Column 1: Accuracy = 0.8670, F1 Score = 0.3519, ROC AUC = 0.6571


Training Columns:  60%|██████    | 3/5 [00:40<00:27, 13.60s/it]

Column 2: Accuracy = 0.7713, F1 Score = 0.3894, ROC AUC = 0.6362


Training Columns:  80%|████████  | 4/5 [00:55<00:13, 13.84s/it]

Column 3: Accuracy = 0.6622, F1 Score = 0.3342, ROC AUC = 0.6390


Training Columns: 100%|██████████| 5/5 [01:08<00:00, 13.63s/it]

Column 4: Accuracy = 0.9574, F1 Score = 0.3261, ROC AUC = 0.5003

Training for: AT2 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:16<02:24, 16.01s/it]

Column 0: Accuracy = 0.9799, F1 Score = 0.7641, ROC AUC = 0.6842


Training Columns:  20%|██        | 2/10 [00:29<01:56, 14.58s/it]

Column 1: Accuracy = 0.9363, F1 Score = 0.5020, ROC AUC = 0.6889


Training Columns:  30%|███       | 3/10 [00:44<01:41, 14.52s/it]

Column 2: Accuracy = 0.8325, F1 Score = 0.6490, ROC AUC = 0.8143


Training Columns:  40%|████      | 4/10 [00:55<01:20, 13.39s/it]

Column 3: Accuracy = 0.7203, F1 Score = 0.6142, ROC AUC = 0.7735


Training Columns:  50%|█████     | 5/10 [01:07<01:03, 12.78s/it]

Column 4: Accuracy = 0.7370, F1 Score = 0.6469, ROC AUC = 0.7835


Training Columns:  60%|██████    | 6/10 [01:23<00:55, 13.97s/it]

Column 5: Accuracy = 0.9715, F1 Score = 0.6856, ROC AUC = 0.7089


Training Columns:  70%|███████   | 7/10 [01:37<00:41, 13.86s/it]

Column 6: Accuracy = 0.9397, F1 Score = 0.6702, ROC AUC = 0.7041


Training Columns:  80%|████████  | 8/10 [01:52<00:28, 14.14s/it]

Column 7: Accuracy = 0.8375, F1 Score = 0.6315, ROC AUC = 0.8278


Training Columns:  90%|█████████ | 9/10 [02:03<00:13, 13.38s/it]

Column 8: Accuracy = 0.7370, F1 Score = 0.6370, ROC AUC = 0.8052


Training Columns: 100%|██████████| 10/10 [02:15<00:00, 13.56s/it]

Column 9: Accuracy = 0.7454, F1 Score = 0.6656, ROC AUC = 0.8210

Training for: AT2 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:15<02:21, 15.71s/it]

Column 0: Accuracy = 0.9799, F1 Score = 0.7641, ROC AUC = 0.6842


Training Columns:  20%|██        | 2/10 [00:29<01:56, 14.60s/it]

Column 1: Accuracy = 0.9296, F1 Score = 0.4953, ROC AUC = 0.6817


Training Columns:  30%|███       | 3/10 [00:43<01:41, 14.52s/it]

Column 2: Accuracy = 0.8224, F1 Score = 0.6460, ROC AUC = 0.8332


Training Columns:  40%|████      | 4/10 [00:56<01:21, 13.55s/it]

Column 3: Accuracy = 0.7270, F1 Score = 0.6258, ROC AUC = 0.8066


Training Columns:  50%|█████     | 5/10 [01:09<01:07, 13.58s/it]

Column 4: Accuracy = 0.7370, F1 Score = 0.6317, ROC AUC = 0.7784


Training Columns:  60%|██████    | 6/10 [01:24<00:55, 13.88s/it]

Column 5: Accuracy = 0.9749, F1 Score = 0.6990, ROC AUC = 0.7932


Training Columns:  70%|███████   | 7/10 [01:38<00:41, 13.89s/it]

Column 6: Accuracy = 0.9380, F1 Score = 0.6392, ROC AUC = 0.8273


Training Columns:  80%|████████  | 8/10 [01:52<00:28, 14.15s/it]

Column 7: Accuracy = 0.8459, F1 Score = 0.6226, ROC AUC = 0.8184


Training Columns:  90%|█████████ | 9/10 [02:04<00:13, 13.45s/it]

Column 8: Accuracy = 0.7538, F1 Score = 0.6604, ROC AUC = 0.7877


Training Columns: 100%|██████████| 10/10 [02:18<00:00, 13.83s/it]

Column 9: Accuracy = 0.7705, F1 Score = 0.6849, ROC AUC = 0.8022

Training for: ST1 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:12<01:53, 12.58s/it]

Column 0: Accuracy = 0.9235, F1 Score = 0.3660, ROC AUC = 0.7177


Training Columns:  20%|██        | 2/10 [00:24<01:38, 12.31s/it]

Column 1: Accuracy = 0.8232, F1 Score = 0.3357, ROC AUC = 0.6160


Training Columns:  30%|███       | 3/10 [00:37<01:28, 12.63s/it]

Column 2: Accuracy = 0.6280, F1 Score = 0.3520, ROC AUC = 0.5936


Training Columns:  40%|████      | 4/10 [00:47<01:09, 11.63s/it]

Column 3: Accuracy = 0.3879, F1 Score = 0.3656, ROC AUC = 0.5415


Training Columns:  50%|█████     | 5/10 [01:01<01:01, 12.22s/it]

Column 4: Accuracy = 0.6834, F1 Score = 0.3131, ROC AUC = 0.5441


Training Columns:  60%|██████    | 6/10 [01:13<00:49, 12.37s/it]

Column 5: Accuracy = 0.9314, F1 Score = 0.3215, ROC AUC = 0.6346


Training Columns:  70%|███████   | 7/10 [01:25<00:36, 12.29s/it]

Column 6: Accuracy = 0.8311, F1 Score = 0.4175, ROC AUC = 0.6182


Training Columns:  80%|████████  | 8/10 [01:38<00:25, 12.52s/it]

Column 7: Accuracy = 0.6095, F1 Score = 0.3951, ROC AUC = 0.6179


Training Columns:  90%|█████████ | 9/10 [01:48<00:11, 11.75s/it]

Column 8: Accuracy = 0.4037, F1 Score = 0.3613, ROC AUC = 0.5784


Training Columns: 100%|██████████| 10/10 [02:00<00:00, 12.03s/it]

Column 9: Accuracy = 0.6728, F1 Score = 0.3349, ROC AUC = 0.5740

Training for: ST1 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:14<02:07, 14.16s/it]

Column 0: Accuracy = 0.9288, F1 Score = 0.3722, ROC AUC = 0.5946


Training Columns:  20%|██        | 2/10 [00:26<01:43, 12.97s/it]

Column 1: Accuracy = 0.8232, F1 Score = 0.3139, ROC AUC = 0.5816


Training Columns:  30%|███       | 3/10 [00:39<01:30, 12.91s/it]

Column 2: Accuracy = 0.6227, F1 Score = 0.3195, ROC AUC = 0.5691


Training Columns:  40%|████      | 4/10 [00:52<01:18, 13.10s/it]

Column 3: Accuracy = 0.4142, F1 Score = 0.3782, ROC AUC = 0.5522


Training Columns:  50%|█████     | 5/10 [01:03<01:02, 12.50s/it]

Column 4: Accuracy = 0.6781, F1 Score = 0.3093, ROC AUC = 0.5718


Training Columns:  60%|██████    | 6/10 [01:18<00:52, 13.12s/it]

Column 5: Accuracy = 0.9393, F1 Score = 0.3229, ROC AUC = 0.5986


Training Columns:  70%|███████   | 7/10 [01:30<00:38, 12.83s/it]

Column 6: Accuracy = 0.8364, F1 Score = 0.3497, ROC AUC = 0.5985


Training Columns:  80%|████████  | 8/10 [01:43<00:25, 12.87s/it]

Column 7: Accuracy = 0.6069, F1 Score = 0.3474, ROC AUC = 0.5249


Training Columns:  90%|█████████ | 9/10 [01:53<00:11, 11.99s/it]

Column 8: Accuracy = 0.3694, F1 Score = 0.3283, ROC AUC = 0.4997


Training Columns: 100%|██████████| 10/10 [02:05<00:00, 12.51s/it]

Column 9: Accuracy = 0.6702, F1 Score = 0.3195, ROC AUC = 0.5143

Training for: ST2 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_gsl.csv



Training Columns:  20%|██        | 1/5 [00:14<00:58, 14.55s/it]

Column 0: Accuracy = 0.9654, F1 Score = 0.3275, ROC AUC = 0.5974


Training Columns:  40%|████      | 2/5 [00:27<00:40, 13.41s/it]

Column 1: Accuracy = 0.8750, F1 Score = 0.3269, ROC AUC = 0.6391


Training Columns:  60%|██████    | 3/5 [00:41<00:27, 13.74s/it]

Column 2: Accuracy = 0.7979, F1 Score = 0.3179, ROC AUC = 0.5950


Training Columns:  80%|████████  | 4/5 [00:53<00:12, 12.96s/it]

Column 3: Accuracy = 0.7048, F1 Score = 0.3456, ROC AUC = 0.6026


Training Columns: 100%|██████████| 5/5 [01:05<00:00, 13.17s/it]

Column 4: Accuracy = 0.9628, F1 Score = 0.3270, ROC AUC = 0.6178

Training for: ST2 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_hist.csv



Training Columns:  20%|██        | 1/5 [00:14<00:57, 14.37s/it]

Column 0: Accuracy = 0.9681, F1 Score = 0.3279, ROC AUC = 0.5871


Training Columns:  40%|████      | 2/5 [00:26<00:40, 13.34s/it]

Column 1: Accuracy = 0.8910, F1 Score = 0.3141, ROC AUC = 0.5666


Training Columns:  60%|██████    | 3/5 [00:40<00:27, 13.60s/it]

Column 2: Accuracy = 0.7793, F1 Score = 0.2924, ROC AUC = 0.5744


Training Columns:  80%|████████  | 4/5 [00:52<00:12, 12.95s/it]

Column 3: Accuracy = 0.6862, F1 Score = 0.2835, ROC AUC = 0.5559


Training Columns: 100%|██████████| 5/5 [01:05<00:00, 13.13s/it]

Column 4: Accuracy = 0.9681, F1 Score = 0.3279, ROC AUC = 0.6266
Training complete. Results saved to 'results_summary.csv'.





In [5]:
results_df

Unnamed: 0,Combination,Avg_Test_Accuracy,Avg_F1_Score,Avg_ROC_AUC
0,AT2 - GSL - Attention Network,0.867002,0.678479,0.847017
1,AT2 - HIST - Attention Network,0.861642,0.647068,0.850232
2,ST1 - GSL - Attention Network,0.701847,0.342972,0.622694
3,ST1 - HIST - Attention Network,0.70686,0.346425,0.626725
4,ST2 - GSL - Attention Network,0.873404,0.342883,0.618152
5,ST2 - HIST - Attention Network,0.870745,0.33378,0.665818
6,AT2 - GSL - Average ResNet50,0.839363,0.646234,0.7851
7,AT2 - HIST - Average ResNet50,0.841709,0.65342,0.816812
8,ST1 - GSL - Average ResNet50,0.663852,0.362236,0.579038
9,ST1 - HIST - Average ResNet50,0.657256,0.369063,0.583785


In [6]:
def random_predictions(path):
    labels = pd.read_csv(path)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_impute = labels.drop(columns=['study_id', 'series_id'])
    imputed_cols = cols_to_impute.apply(lambda x: x.fillna(x.mode()[0]))
    final_df = pd.concat([id_cols, imputed_cols], axis=1)

    id_cols = final_df[['study_id', 'series_id']]
    cols_to_encode = final_df.drop(columns=['study_id', 'series_id'])
    encoder = OneHotEncoder(sparse_output=False)
    encoded_cols = encoder.fit_transform(cols_to_encode)
    encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out(cols_to_encode.columns))
    final_df = pd.concat([id_cols, encoded_df], axis=1)

    Y_true = final_df.drop(columns=['study_id', 'series_id']).values

    np.random.seed(42)  # For reproducibility
    random_predictions = np.random.rand(*Y_true.shape)  # Random floats in [0.0, 1.0]

    threshold = np.mean(random_predictions)

    binary_predictions = (random_predictions > threshold).astype(float)

    accuracy = np.mean(np.all(binary_predictions == Y_true, axis=1))

    print(f"Random Predictions Accuracy: {accuracy:.4f}")
    print(f"Self-adjusting Threshold: {threshold:.4f}")

## Random Accuracy

In [7]:
random_predictions('/kaggle/input/preprocessed-dataset/train_data_AT2.csv')
random_predictions('/kaggle/input/preprocessed-dataset/train_data_ST1.csv')
random_predictions('/kaggle/input/preprocessed-dataset/train_data_ST2.csv')

Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.4996
Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.4995
Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.5004
