# **A Novel Approach for Three-Way Classification of Lumbar Spine Degeneration Using Pseudo-Modality Learning to Handle Missing MRI Data**

## Libs

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.model_selection import KFold
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import KFold
import joblib
import lightgbm as lgb
import xgboost as xgb
from tqdm import tqdm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

## Training Pipeline

In [2]:
attention_embeddings_paths = [
    '/kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_hist.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_hist.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_gsl.csv',
    '/kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_hist.csv'    
]

average_embeddings_paths = [
    '/kaggle/input/embeddings-for-rsna/at2-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/at2-hist/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st1-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st1-hist/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st2-greyscl/final_embeddings.csv',
    '/kaggle/input/embeddings-for-rsna/st2-hist/final_embeddings.csv'
]

medicalnet_embeddings_paths = [
    '/kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_hist.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_hist.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_gsl.csv',
    '/kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_hist.csv',
]

labels_paths = [
    '/kaggle/input/preprocessed-dataset/train_data_AT2.csv',
    '/kaggle/input/preprocessed-dataset/train_data_ST1.csv',
    '/kaggle/input/preprocessed-dataset/train_data_ST2.csv'
]

list_of_combination = [
    'AT2 - GSL - Attention Network',
    'AT2 - HIST - Attention Network',
    'ST1 - GSL - Attention Network',
    'ST1 - HIST - Attention Network',
    'ST2 - GSL - Attention Network',
    'ST2 - HIST - Attention Network',
    
    'AT2 - GSL - Average ResNet50',
    'AT2 - HIST - Average ResNet50',
    'ST1 - GSL - Average ResNet50',
    'ST1 - HIST - Average ResNet50',
    'ST2 - GSL - Average ResNet50',
    'ST2 - HIST - Average ResNet50',
    
    'AT2 - GSL - MedicalNet Network',
    'AT2 - HIST - MedicalNet Network',
    'ST1 - GSL - MedicalNet Network',
    'ST1 - HIST - MedicalNet Network',
    'ST2 - GSL - MedicalNet Network',
    'ST2 - HIST - MedicalNet Network'
]

results_df = pd.DataFrame(columns=['Combination', 'Avg_Test_Accuracy', 'Avg_Val_Accuracy'])

all_embedding_paths = [
    *attention_embeddings_paths,
    *average_embeddings_paths,
    *medicalnet_embeddings_paths
]

corresponding_labels_paths = [
    labels_paths[0],  # AT2 - GSL - Attention Network
    labels_paths[0],  # AT2 - HIST - Attention Network
    labels_paths[1],  # ST1 - GSL - Attention Network
    labels_paths[1],  # ST1 - HIST - Attention Network
    labels_paths[2],  # ST2 - GSL - Attention Network
    labels_paths[2],  # ST2 - HIST - Attention Network
    
    labels_paths[0],  # AT2 - GSL - Average ResNet50
    labels_paths[0],  # AT2 - HIST - Average ResNet50
    labels_paths[1],  # ST1 - GSL - Average ResNet50
    labels_paths[1],  # ST1 - HIST - Average ResNet50
    labels_paths[2],  # ST2 - GSL - Average ResNet50
    labels_paths[2],  # ST2 - HIST - Average ResNet50
    
    labels_paths[0],  # AT2 - GSL - MedicalNet Network
    labels_paths[0],  # AT2 - HIST - MedicalNet Network
    labels_paths[1],  # ST1 - GSL - MedicalNet Network
    labels_paths[1],  # ST1 - HIST - MedicalNet Network
    labels_paths[2],  # ST2 - GSL - MedicalNet Network
    labels_paths[2],  # ST2 - HIST - MedicalNet Network
]

In [4]:
def train(embedding_path, labels_path, model_save_path_prefix='gpu_ensemble_model_col_'):
    embeddings = pd.read_csv(embedding_path)
    labels = pd.read_csv(labels_path)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_impute = labels.drop(columns=['study_id', 'series_id'])
    imputed_cols = cols_to_impute.apply(lambda x: x.fillna(x.mode()[0]))
    labels = pd.concat([id_cols, imputed_cols], axis=1)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_encode = labels.drop(columns=['study_id', 'series_id'])
    encoded_df = cols_to_encode.apply(LabelEncoder().fit_transform)
    final_df = pd.concat([id_cols, encoded_df], axis=1)

    df = pd.merge(embeddings, final_df, on='study_id', how='inner')

    X = df.iloc[:, :512].values
    Y = df.iloc[:, 515:].values

    avg_accuracies = []
    avg_f1_scores = []
    avg_roc_auc_scores = []

    for col in tqdm(range(Y.shape[1]), desc="Training Columns"):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:, col], test_size=0.2, random_state=42)

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        try:
            minority_class_size = min(np.bincount(Y_train))
            n_neighbors = min(5, minority_class_size - 1)
            smote = SMOTE(random_state=42, k_neighbors=n_neighbors)
            X_train, Y_train = smote.fit_resample(X_train, Y_train)
        except ValueError as e:
            print(f"Skipping SMOTE for column {col} due to error: {e}")

        xgb_model = xgb.XGBClassifier(
            n_estimators=100, 
            max_depth=1, 
            learning_rate=0.1,
            objective='multi:softprob',  
            num_class=len(np.unique(Y_train)),  
            tree_method='hist',  
            device='cuda',  
            random_state=42
        )

        bagging_model = BaggingClassifier(
            estimator=xgb_model,
            n_estimators=10,  
            max_samples=0.8,  
            max_features=0.8,  
            random_state=42,
            n_jobs=-1  
        )

        bagging_model.fit(X_train, Y_train)

        joblib.dump(bagging_model, f'{model_save_path_prefix}{col}.joblib')

        Y_pred = bagging_model.predict(X_test)
        Y_pred_prob = bagging_model.predict_proba(X_test)

        accuracy = accuracy_score(Y_test, Y_pred)
        f1 = f1_score(Y_test, Y_pred, average='macro')

        if len(np.unique(Y_test)) > 2:
            roc_auc = roc_auc_score(Y_test, Y_pred_prob, multi_class='ovr')
        else:
            roc_auc = roc_auc_score(Y_test, Y_pred)

        avg_accuracies.append(accuracy)
        avg_f1_scores.append(f1)
        avg_roc_auc_scores.append(roc_auc)

        print(f'Column {col}: Accuracy = {accuracy:.4f}, F1 Score = {f1:.4f}, ROC AUC = {roc_auc:.4f}')

    return np.mean(avg_accuracies), np.mean(avg_f1_scores), np.mean(avg_roc_auc_scores)

In [5]:
results_df = pd.DataFrame(columns=['Combination', 'Avg_Test_Accuracy', 'Avg_F1_Score', 'Avg_ROC_AUC'])

for embedding_path, label_path, name in zip(all_embedding_paths, corresponding_labels_paths, list_of_combination):
    print(f"\nTraining for: {name} - {embedding_path}")
    
    model_save_path_prefix = f'{name}_svm_model_col_'
    
    avg_accuracy, avg_f1_score, avg_roc_auc = train(embedding_path, label_path)
    
    result_row = pd.DataFrame({
        'Combination': [name],
        'Avg_Test_Accuracy': [avg_accuracy],
        'Avg_F1_Score': [avg_f1_score], 
        'Avg_ROC_AUC': [avg_roc_auc],
    })
    
    results_df = pd.concat([results_df, result_row], ignore_index=True)

results_df.to_csv('results_summary.csv', index=False)

print("Training complete. Results saved to 'results_summary.csv'.")


Training for: AT2 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_gsl.csv


  pid = os.fork()
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Training Columns:  10%|█         | 1/10 [00:12<01:48, 12.03s/it]

Column 0: Accuracy = 0.9363, F1 Score = 0.6557, ROC AUC = 0.7381


Training Columns:  20%|██        | 2/10 [00:21<01:22, 10.31s/it]

Column 1: Accuracy = 0.7956, F1 Score = 0.3869, ROC AUC = 0.6190


Training Columns:  30%|███       | 3/10 [00:30<01:08,  9.75s/it]

Column 2: Accuracy = 0.6968, F1 Score = 0.4913, ROC AUC = 0.7344


Training Columns:  40%|████      | 4/10 [00:38<00:55,  9.30s/it]

Column 3: Accuracy = 0.5260, F1 Score = 0.4037, ROC AUC = 0.6400


Training Columns:  50%|█████     | 5/10 [00:47<00:45,  9.09s/it]

Column 4: Accuracy = 0.5209, F1 Score = 0.4029, ROC AUC = 0.6099


Training Columns:  60%|██████    | 6/10 [00:56<00:36,  9.05s/it]

Column 5: Accuracy = 0.8811, F1 Score = 0.4710, ROC AUC = 0.7678


Training Columns:  70%|███████   | 7/10 [01:04<00:26,  8.76s/it]

Column 6: Accuracy = 0.8157, F1 Score = 0.4947, ROC AUC = 0.7874


Training Columns:  80%|████████  | 8/10 [01:13<00:17,  8.69s/it]

Column 7: Accuracy = 0.6616, F1 Score = 0.4154, ROC AUC = 0.7247


Training Columns:  90%|█████████ | 9/10 [01:21<00:08,  8.40s/it]

Column 8: Accuracy = 0.5729, F1 Score = 0.4591, ROC AUC = 0.6813


Training Columns: 100%|██████████| 10/10 [01:29<00:00,  8.90s/it]

Column 9: Accuracy = 0.5561, F1 Score = 0.4627, ROC AUC = 0.6570

Training for: AT2 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/AT2_attention_embeddings_hist.csv



  results_df = pd.concat([results_df, result_row], ignore_index=True)
Training Columns:  10%|█         | 1/10 [00:09<01:28,  9.78s/it]

Column 0: Accuracy = 0.9162, F1 Score = 0.6206, ROC AUC = 0.7277


Training Columns:  20%|██        | 2/10 [00:18<01:13,  9.24s/it]

Column 1: Accuracy = 0.8157, F1 Score = 0.4214, ROC AUC = 0.6021


Training Columns:  30%|███       | 3/10 [00:26<01:00,  8.59s/it]

Column 2: Accuracy = 0.6365, F1 Score = 0.4331, ROC AUC = 0.7228


Training Columns:  40%|████      | 4/10 [00:33<00:48,  8.09s/it]

Column 3: Accuracy = 0.5595, F1 Score = 0.4320, ROC AUC = 0.6666


Training Columns:  50%|█████     | 5/10 [00:41<00:40,  8.08s/it]

Column 4: Accuracy = 0.5578, F1 Score = 0.4349, ROC AUC = 0.6345


Training Columns:  60%|██████    | 6/10 [00:51<00:34,  8.56s/it]

Column 5: Accuracy = 0.8827, F1 Score = 0.5296, ROC AUC = 0.8407


Training Columns:  70%|███████   | 7/10 [00:59<00:25,  8.50s/it]

Column 6: Accuracy = 0.7940, F1 Score = 0.5107, ROC AUC = 0.7978


Training Columns:  80%|████████  | 8/10 [01:08<00:17,  8.60s/it]

Column 7: Accuracy = 0.6650, F1 Score = 0.4361, ROC AUC = 0.6998


Training Columns:  90%|█████████ | 9/10 [01:17<00:08,  8.61s/it]

Column 8: Accuracy = 0.5745, F1 Score = 0.4783, ROC AUC = 0.6806


Training Columns: 100%|██████████| 10/10 [01:25<00:00,  8.53s/it]

Column 9: Accuracy = 0.5243, F1 Score = 0.4248, ROC AUC = 0.6341

Training for: ST1 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:07<01:06,  7.43s/it]

Column 0: Accuracy = 0.7678, F1 Score = 0.3766, ROC AUC = 0.6133


Training Columns:  20%|██        | 2/10 [00:14<00:57,  7.16s/it]

Column 1: Accuracy = 0.6148, F1 Score = 0.3547, ROC AUC = 0.6566


Training Columns:  30%|███       | 3/10 [00:22<00:52,  7.48s/it]

Column 2: Accuracy = 0.5172, F1 Score = 0.3821, ROC AUC = 0.6048


Training Columns:  40%|████      | 4/10 [00:29<00:44,  7.39s/it]

Column 3: Accuracy = 0.4063, F1 Score = 0.3908, ROC AUC = 0.5580


Training Columns:  50%|█████     | 5/10 [00:36<00:36,  7.30s/it]

Column 4: Accuracy = 0.4828, F1 Score = 0.3527, ROC AUC = 0.5584


Training Columns:  60%|██████    | 6/10 [00:43<00:29,  7.26s/it]

Column 5: Accuracy = 0.7414, F1 Score = 0.3093, ROC AUC = 0.5488


Training Columns:  70%|███████   | 7/10 [00:50<00:21,  7.20s/it]

Column 6: Accuracy = 0.6675, F1 Score = 0.3857, ROC AUC = 0.6249


Training Columns:  80%|████████  | 8/10 [00:58<00:14,  7.34s/it]

Column 7: Accuracy = 0.5040, F1 Score = 0.3904, ROC AUC = 0.5880


Training Columns:  90%|█████████ | 9/10 [01:05<00:07,  7.36s/it]

Column 8: Accuracy = 0.3747, F1 Score = 0.3614, ROC AUC = 0.5760


Training Columns: 100%|██████████| 10/10 [01:12<00:00,  7.28s/it]

Column 9: Accuracy = 0.4776, F1 Score = 0.3512, ROC AUC = 0.5694

Training for: ST1 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST1_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:07<01:07,  7.53s/it]

Column 0: Accuracy = 0.8179, F1 Score = 0.3682, ROC AUC = 0.6221


Training Columns:  20%|██        | 2/10 [00:14<00:57,  7.19s/it]

Column 1: Accuracy = 0.6438, F1 Score = 0.3401, ROC AUC = 0.6050


Training Columns:  30%|███       | 3/10 [00:22<00:51,  7.38s/it]

Column 2: Accuracy = 0.5040, F1 Score = 0.3610, ROC AUC = 0.5532


Training Columns:  40%|████      | 4/10 [00:28<00:42,  7.10s/it]

Column 3: Accuracy = 0.3799, F1 Score = 0.3684, ROC AUC = 0.5765


Training Columns:  50%|█████     | 5/10 [00:36<00:36,  7.24s/it]

Column 4: Accuracy = 0.4723, F1 Score = 0.3194, ROC AUC = 0.5376


Training Columns:  60%|██████    | 6/10 [00:43<00:28,  7.22s/it]

Column 5: Accuracy = 0.8153, F1 Score = 0.3807, ROC AUC = 0.6172


  pid = os.fork()
Training Columns:  70%|███████   | 7/10 [00:50<00:21,  7.21s/it]

Column 6: Accuracy = 0.6385, F1 Score = 0.3793, ROC AUC = 0.6297


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Training Columns:  80%|████████  | 8/10 [00:58<00:14,  7.43s/it]

Column 7: Accuracy = 0.4723, F1 Score = 0.3611, ROC AUC = 0.6088


Training Columns:  90%|█████████ | 9/10 [01:05<00:07,  7.37s/it]

Column 8: Accuracy = 0.3958, F1 Score = 0.3759, ROC AUC = 0.5719


Training Columns: 100%|██████████| 10/10 [01:12<00:00,  7.25s/it]

Column 9: Accuracy = 0.4855, F1 Score = 0.3439, ROC AUC = 0.5569

Training for: ST2 - GSL - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_gsl.csv



Training Columns:  20%|██        | 1/5 [00:06<00:27,  6.90s/it]

Column 0: Accuracy = 0.8378, F1 Score = 0.3827, ROC AUC = 0.8045


Training Columns:  40%|████      | 2/5 [00:14<00:21,  7.05s/it]

Column 1: Accuracy = 0.7394, F1 Score = 0.4101, ROC AUC = 0.7317


Training Columns:  60%|██████    | 3/5 [00:21<00:14,  7.42s/it]

Column 2: Accuracy = 0.5824, F1 Score = 0.3335, ROC AUC = 0.5977


Training Columns:  80%|████████  | 4/5 [00:29<00:07,  7.59s/it]

Column 3: Accuracy = 0.5160, F1 Score = 0.4087, ROC AUC = 0.6109


Training Columns: 100%|██████████| 5/5 [00:36<00:00,  7.39s/it]

Column 4: Accuracy = 0.8457, F1 Score = 0.3055, ROC AUC = 0.4708

Training for: ST2 - HIST - Attention Network - /kaggle/input/attention-embeddings-for-rsna/ST2_attention_embeddings_hist.csv



Training Columns:  20%|██        | 1/5 [00:07<00:30,  7.71s/it]

Column 0: Accuracy = 0.8218, F1 Score = 0.3232, ROC AUC = 0.6517


Training Columns:  40%|████      | 2/5 [00:15<00:22,  7.50s/it]

Column 1: Accuracy = 0.6543, F1 Score = 0.3186, ROC AUC = 0.6833


Training Columns:  60%|██████    | 3/5 [00:22<00:15,  7.55s/it]

Column 2: Accuracy = 0.6277, F1 Score = 0.3697, ROC AUC = 0.5921


Training Columns:  80%|████████  | 4/5 [00:29<00:07,  7.30s/it]

Column 3: Accuracy = 0.5346, F1 Score = 0.3947, ROC AUC = 0.6199


Training Columns: 100%|██████████| 5/5 [00:36<00:00,  7.39s/it]

Column 4: Accuracy = 0.8298, F1 Score = 0.3181, ROC AUC = 0.4743

Training for: AT2 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/at2-greyscl/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:08<01:19,  8.84s/it]

Column 0: Accuracy = 0.8224, F1 Score = 0.3407, ROC AUC = 0.6085


Training Columns:  20%|██        | 2/10 [00:16<01:07,  8.38s/it]

Column 1: Accuracy = 0.6633, F1 Score = 0.3489, ROC AUC = 0.5399


Training Columns:  30%|███       | 3/10 [00:24<00:56,  8.09s/it]

Column 2: Accuracy = 0.5829, F1 Score = 0.3933, ROC AUC = 0.6579


Training Columns:  40%|████      | 4/10 [00:31<00:46,  7.78s/it]

Column 3: Accuracy = 0.4640, F1 Score = 0.3545, ROC AUC = 0.5860


Training Columns:  50%|█████     | 5/10 [00:39<00:38,  7.72s/it]

Column 4: Accuracy = 0.4171, F1 Score = 0.3614, ROC AUC = 0.5822


Training Columns:  60%|██████    | 6/10 [00:47<00:31,  7.92s/it]

Column 5: Accuracy = 0.7069, F1 Score = 0.3459, ROC AUC = 0.7606


Training Columns:  70%|███████   | 7/10 [00:55<00:23,  7.98s/it]

Column 6: Accuracy = 0.6365, F1 Score = 0.3458, ROC AUC = 0.6753


Training Columns:  80%|████████  | 8/10 [01:04<00:16,  8.04s/it]

Column 7: Accuracy = 0.5611, F1 Score = 0.3793, ROC AUC = 0.6500


Training Columns:  90%|█████████ | 9/10 [01:11<00:07,  7.71s/it]

Column 8: Accuracy = 0.5193, F1 Score = 0.4107, ROC AUC = 0.6170


Training Columns: 100%|██████████| 10/10 [01:18<00:00,  7.88s/it]

Column 9: Accuracy = 0.4824, F1 Score = 0.4074, ROC AUC = 0.6181

Training for: AT2 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/at2-hist/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:08<01:18,  8.70s/it]

Column 0: Accuracy = 0.8459, F1 Score = 0.3606, ROC AUC = 0.6685


Training Columns:  20%|██        | 2/10 [00:17<01:09,  8.75s/it]

Column 1: Accuracy = 0.7136, F1 Score = 0.3761, ROC AUC = 0.6260


Training Columns:  30%|███       | 3/10 [00:25<00:58,  8.30s/it]

Column 2: Accuracy = 0.6147, F1 Score = 0.3942, ROC AUC = 0.6697


Training Columns:  40%|████      | 4/10 [00:32<00:47,  7.88s/it]

Column 3: Accuracy = 0.4456, F1 Score = 0.3630, ROC AUC = 0.6035


Training Columns:  50%|█████     | 5/10 [00:40<00:40,  8.06s/it]

Column 4: Accuracy = 0.4573, F1 Score = 0.3942, ROC AUC = 0.5963


Training Columns:  60%|██████    | 6/10 [00:50<00:34,  8.57s/it]

Column 5: Accuracy = 0.7387, F1 Score = 0.3590, ROC AUC = 0.7874


Training Columns:  70%|███████   | 7/10 [00:59<00:26,  8.71s/it]

Column 6: Accuracy = 0.6868, F1 Score = 0.3931, ROC AUC = 0.6893


Training Columns:  80%|████████  | 8/10 [01:07<00:17,  8.65s/it]

Column 7: Accuracy = 0.5946, F1 Score = 0.3970, ROC AUC = 0.6791


Training Columns:  90%|█████████ | 9/10 [01:15<00:08,  8.18s/it]

Column 8: Accuracy = 0.4841, F1 Score = 0.3974, ROC AUC = 0.6251


Training Columns: 100%|██████████| 10/10 [01:23<00:00,  8.30s/it]

Column 9: Accuracy = 0.5008, F1 Score = 0.4237, ROC AUC = 0.6372

Training for: ST1 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st1-greyscl/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:07<01:11,  7.95s/it]

Column 0: Accuracy = 0.6385, F1 Score = 0.3157, ROC AUC = 0.5942


Training Columns:  20%|██        | 2/10 [00:15<01:03,  7.95s/it]

Column 1: Accuracy = 0.4934, F1 Score = 0.3011, ROC AUC = 0.5590


Training Columns:  30%|███       | 3/10 [00:22<00:51,  7.31s/it]

Column 2: Accuracy = 0.4433, F1 Score = 0.3611, ROC AUC = 0.5984


Training Columns:  40%|████      | 4/10 [00:29<00:43,  7.31s/it]

Column 3: Accuracy = 0.3509, F1 Score = 0.3433, ROC AUC = 0.5479


Training Columns:  50%|█████     | 5/10 [00:36<00:35,  7.13s/it]

Column 4: Accuracy = 0.4011, F1 Score = 0.3134, ROC AUC = 0.5125


Training Columns:  60%|██████    | 6/10 [00:43<00:28,  7.12s/it]

Column 5: Accuracy = 0.6095, F1 Score = 0.3193, ROC AUC = 0.5734


Training Columns:  70%|███████   | 7/10 [00:51<00:22,  7.40s/it]

Column 6: Accuracy = 0.4565, F1 Score = 0.3307, ROC AUC = 0.6515


Training Columns:  80%|████████  | 8/10 [00:58<00:14,  7.16s/it]

Column 7: Accuracy = 0.4274, F1 Score = 0.3400, ROC AUC = 0.5248


Training Columns:  90%|█████████ | 9/10 [01:04<00:06,  6.87s/it]

Column 8: Accuracy = 0.3641, F1 Score = 0.3474, ROC AUC = 0.5207


Training Columns: 100%|██████████| 10/10 [01:11<00:00,  7.19s/it]

Column 9: Accuracy = 0.3905, F1 Score = 0.3202, ROC AUC = 0.5541

Training for: ST1 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st1-hist/final_embeddings.csv



Training Columns:  10%|█         | 1/10 [00:07<01:06,  7.38s/it]

Column 0: Accuracy = 0.6438, F1 Score = 0.3024, ROC AUC = 0.5723


Training Columns:  20%|██        | 2/10 [00:14<00:56,  7.07s/it]

Column 1: Accuracy = 0.4644, F1 Score = 0.2928, ROC AUC = 0.5439


Training Columns:  30%|███       | 3/10 [00:21<00:49,  7.02s/it]

Column 2: Accuracy = 0.4142, F1 Score = 0.3369, ROC AUC = 0.5444


Training Columns:  40%|████      | 4/10 [00:28<00:42,  7.12s/it]

Column 3: Accuracy = 0.3694, F1 Score = 0.3582, ROC AUC = 0.5346


Training Columns:  50%|█████     | 5/10 [00:35<00:35,  7.16s/it]

Column 4: Accuracy = 0.3536, F1 Score = 0.2649, ROC AUC = 0.4566


Training Columns:  60%|██████    | 6/10 [00:42<00:28,  7.15s/it]

Column 5: Accuracy = 0.5198, F1 Score = 0.2522, ROC AUC = 0.4863


  pid = os.fork()
Training Columns:  70%|███████   | 7/10 [00:49<00:20,  6.88s/it]

Column 6: Accuracy = 0.4248, F1 Score = 0.2961, ROC AUC = 0.5808


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Training Columns:  80%|████████  | 8/10 [00:55<00:13,  6.70s/it]

Column 7: Accuracy = 0.4512, F1 Score = 0.3646, ROC AUC = 0.5524


Training Columns:  90%|█████████ | 9/10 [01:02<00:06,  6.73s/it]

Column 8: Accuracy = 0.3615, F1 Score = 0.3574, ROC AUC = 0.5476


Training Columns: 100%|██████████| 10/10 [01:08<00:00,  6.87s/it]

Column 9: Accuracy = 0.3958, F1 Score = 0.3180, ROC AUC = 0.5621

Training for: ST2 - GSL - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st2-greyscl/final_embeddings.csv



Training Columns:  20%|██        | 1/5 [00:07<00:28,  7.19s/it]

Column 0: Accuracy = 0.6516, F1 Score = 0.2845, ROC AUC = 0.5313


Training Columns:  40%|████      | 2/5 [00:14<00:21,  7.12s/it]

Column 1: Accuracy = 0.5532, F1 Score = 0.3426, ROC AUC = 0.7322


Training Columns:  60%|██████    | 3/5 [00:21<00:14,  7.11s/it]

Column 2: Accuracy = 0.4867, F1 Score = 0.3405, ROC AUC = 0.6614


Training Columns:  80%|████████  | 4/5 [00:28<00:07,  7.04s/it]

Column 3: Accuracy = 0.4654, F1 Score = 0.4104, ROC AUC = 0.6961


Training Columns: 100%|██████████| 5/5 [00:35<00:00,  7.08s/it]

Column 4: Accuracy = 0.5559, F1 Score = 0.2563, ROC AUC = 0.5582

Training for: ST2 - HIST - Average ResNet50 - /kaggle/input/embeddings-for-rsna/st2-hist/final_embeddings.csv



Training Columns:  20%|██        | 1/5 [00:06<00:27,  6.82s/it]

Column 0: Accuracy = 0.6676, F1 Score = 0.2911, ROC AUC = 0.5646


Training Columns:  40%|████      | 2/5 [00:14<00:22,  7.45s/it]

Column 1: Accuracy = 0.5027, F1 Score = 0.3009, ROC AUC = 0.6357


Training Columns:  60%|██████    | 3/5 [00:21<00:14,  7.25s/it]

Column 2: Accuracy = 0.5080, F1 Score = 0.3682, ROC AUC = 0.6673


Training Columns:  80%|████████  | 4/5 [00:28<00:07,  7.09s/it]

Column 3: Accuracy = 0.5160, F1 Score = 0.4452, ROC AUC = 0.6864


Training Columns: 100%|██████████| 5/5 [00:35<00:00,  7.13s/it]

Column 4: Accuracy = 0.6569, F1 Score = 0.2738, ROC AUC = 0.5077

Training for: AT2 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:08<01:15,  8.38s/it]

Column 0: Accuracy = 0.7772, F1 Score = 0.4901, ROC AUC = 0.6050


Training Columns:  20%|██        | 2/10 [00:17<01:10,  8.76s/it]

Column 1: Accuracy = 0.6114, F1 Score = 0.3163, ROC AUC = 0.5022


Training Columns:  30%|███       | 3/10 [00:24<00:56,  8.08s/it]

Column 2: Accuracy = 0.4824, F1 Score = 0.3432, ROC AUC = 0.6355


Training Columns:  40%|████      | 4/10 [00:32<00:48,  8.02s/it]

Column 3: Accuracy = 0.4121, F1 Score = 0.3540, ROC AUC = 0.5916


Training Columns:  50%|█████     | 5/10 [00:39<00:38,  7.68s/it]

Column 4: Accuracy = 0.3802, F1 Score = 0.3457, ROC AUC = 0.5644


  pid = os.fork()
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Training Columns:  60%|██████    | 6/10 [00:50<00:34,  8.68s/it]

Column 5: Accuracy = 0.5745, F1 Score = 0.2837, ROC AUC = 0.5968


Training Columns:  70%|███████   | 7/10 [00:59<00:26,  8.86s/it]

Column 6: Accuracy = 0.5561, F1 Score = 0.3048, ROC AUC = 0.5847


Training Columns:  80%|████████  | 8/10 [01:06<00:16,  8.42s/it]

Column 7: Accuracy = 0.4992, F1 Score = 0.3313, ROC AUC = 0.5656


Training Columns:  90%|█████████ | 9/10 [01:14<00:08,  8.01s/it]

Column 8: Accuracy = 0.4389, F1 Score = 0.3707, ROC AUC = 0.5835


Training Columns: 100%|██████████| 10/10 [01:21<00:00,  8.15s/it]

Column 9: Accuracy = 0.3668, F1 Score = 0.3279, ROC AUC = 0.5674

Training for: AT2 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/AT2_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:09<01:24,  9.34s/it]

Column 0: Accuracy = 0.8275, F1 Score = 0.3470, ROC AUC = 0.6337


Training Columns:  20%|██        | 2/10 [00:18<01:12,  9.06s/it]

Column 1: Accuracy = 0.6214, F1 Score = 0.3315, ROC AUC = 0.4959


Training Columns:  30%|███       | 3/10 [00:25<00:58,  8.42s/it]

Column 2: Accuracy = 0.4975, F1 Score = 0.3606, ROC AUC = 0.6597


Training Columns:  40%|████      | 4/10 [00:34<00:49,  8.32s/it]

Column 3: Accuracy = 0.3970, F1 Score = 0.3365, ROC AUC = 0.5833


Training Columns:  50%|█████     | 5/10 [00:41<00:39,  7.88s/it]

Column 4: Accuracy = 0.3869, F1 Score = 0.3442, ROC AUC = 0.5437


Training Columns:  60%|██████    | 6/10 [00:49<00:31,  7.94s/it]

Column 5: Accuracy = 0.6382, F1 Score = 0.3182, ROC AUC = 0.6741


Training Columns:  70%|███████   | 7/10 [00:58<00:25,  8.37s/it]

Column 6: Accuracy = 0.5394, F1 Score = 0.2980, ROC AUC = 0.5818


Training Columns:  80%|████████  | 8/10 [01:05<00:16,  8.05s/it]

Column 7: Accuracy = 0.5109, F1 Score = 0.3288, ROC AUC = 0.6426


Training Columns:  90%|█████████ | 9/10 [01:13<00:07,  7.96s/it]

Column 8: Accuracy = 0.4523, F1 Score = 0.3557, ROC AUC = 0.5893


Training Columns: 100%|██████████| 10/10 [01:21<00:00,  8.18s/it]

Column 9: Accuracy = 0.3869, F1 Score = 0.3310, ROC AUC = 0.5510

Training for: ST1 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_gsl.csv



Training Columns:  10%|█         | 1/10 [00:06<01:02,  6.98s/it]

Column 0: Accuracy = 0.5831, F1 Score = 0.3123, ROC AUC = 0.6771


Training Columns:  20%|██        | 2/10 [00:14<01:00,  7.51s/it]

Column 1: Accuracy = 0.4459, F1 Score = 0.2981, ROC AUC = 0.5652


Training Columns:  30%|███       | 3/10 [00:22<00:52,  7.44s/it]

Column 2: Accuracy = 0.4063, F1 Score = 0.3346, ROC AUC = 0.5694


Training Columns:  40%|████      | 4/10 [00:29<00:44,  7.38s/it]

Column 3: Accuracy = 0.3641, F1 Score = 0.3599, ROC AUC = 0.5529


Training Columns:  50%|█████     | 5/10 [00:37<00:37,  7.52s/it]

Column 4: Accuracy = 0.3799, F1 Score = 0.2993, ROC AUC = 0.5668


Training Columns:  60%|██████    | 6/10 [00:44<00:29,  7.34s/it]

Column 5: Accuracy = 0.5778, F1 Score = 0.3014, ROC AUC = 0.5670


  pid = os.fork()
Training Columns:  70%|███████   | 7/10 [00:50<00:21,  7.13s/it]

Column 6: Accuracy = 0.4644, F1 Score = 0.3044, ROC AUC = 0.5602


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Training Columns:  80%|████████  | 8/10 [00:57<00:13,  6.87s/it]

Column 7: Accuracy = 0.3852, F1 Score = 0.3353, ROC AUC = 0.5514


Training Columns:  90%|█████████ | 9/10 [01:03<00:06,  6.67s/it]

Column 8: Accuracy = 0.3668, F1 Score = 0.3598, ROC AUC = 0.5427


Training Columns: 100%|██████████| 10/10 [01:11<00:00,  7.14s/it]

Column 9: Accuracy = 0.4354, F1 Score = 0.3517, ROC AUC = 0.5768

Training for: ST1 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST1_attention_embeddings_hist.csv



Training Columns:  10%|█         | 1/10 [00:07<01:04,  7.17s/it]

Column 0: Accuracy = 0.6174, F1 Score = 0.3152, ROC AUC = 0.5922


Training Columns:  20%|██        | 2/10 [00:14<00:58,  7.29s/it]

Column 1: Accuracy = 0.4354, F1 Score = 0.2902, ROC AUC = 0.5790


Training Columns:  30%|███       | 3/10 [00:22<00:51,  7.38s/it]

Column 2: Accuracy = 0.3377, F1 Score = 0.2849, ROC AUC = 0.5358


Training Columns:  40%|████      | 4/10 [00:28<00:42,  7.07s/it]

Column 3: Accuracy = 0.3958, F1 Score = 0.3909, ROC AUC = 0.5701


Training Columns:  50%|█████     | 5/10 [00:36<00:36,  7.27s/it]

Column 4: Accuracy = 0.3905, F1 Score = 0.3214, ROC AUC = 0.5692


Training Columns:  60%|██████    | 6/10 [00:44<00:30,  7.54s/it]

Column 5: Accuracy = 0.6095, F1 Score = 0.2921, ROC AUC = 0.5870


Training Columns:  70%|███████   | 7/10 [00:51<00:22,  7.38s/it]

Column 6: Accuracy = 0.4512, F1 Score = 0.2949, ROC AUC = 0.5687


Training Columns:  80%|████████  | 8/10 [00:57<00:14,  7.14s/it]

Column 7: Accuracy = 0.4011, F1 Score = 0.3485, ROC AUC = 0.5596


Training Columns:  90%|█████████ | 9/10 [01:05<00:07,  7.18s/it]

Column 8: Accuracy = 0.3931, F1 Score = 0.3771, ROC AUC = 0.5455


Training Columns: 100%|██████████| 10/10 [01:12<00:00,  7.20s/it]

Column 9: Accuracy = 0.3931, F1 Score = 0.3052, ROC AUC = 0.5592

Training for: ST2 - GSL - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_gsl.csv



Training Columns:  20%|██        | 1/5 [00:07<00:29,  7.47s/it]

Column 0: Accuracy = 0.6277, F1 Score = 0.2888, ROC AUC = 0.6499


Training Columns:  40%|████      | 2/5 [00:15<00:23,  7.79s/it]

Column 1: Accuracy = 0.5319, F1 Score = 0.3062, ROC AUC = 0.6522


Training Columns:  60%|██████    | 3/5 [00:23<00:15,  7.67s/it]

Column 2: Accuracy = 0.4309, F1 Score = 0.3086, ROC AUC = 0.6085


Training Columns:  80%|████████  | 4/5 [00:29<00:07,  7.27s/it]

Column 3: Accuracy = 0.4309, F1 Score = 0.3361, ROC AUC = 0.6032


Training Columns: 100%|██████████| 5/5 [00:36<00:00,  7.34s/it]

Column 4: Accuracy = 0.6968, F1 Score = 0.2835, ROC AUC = 0.5953

Training for: ST2 - HIST - MedicalNet Network - /kaggle/input/medicalnet-attention-layers-for-rsna/ST2_attention_embeddings_hist.csv



Training Columns:  20%|██        | 1/5 [00:07<00:28,  7.21s/it]

Column 0: Accuracy = 0.6463, F1 Score = 0.2767, ROC AUC = 0.5162


Training Columns:  40%|████      | 2/5 [00:15<00:22,  7.58s/it]

Column 1: Accuracy = 0.5665, F1 Score = 0.3159, ROC AUC = 0.5973


Training Columns:  60%|██████    | 3/5 [00:22<00:14,  7.34s/it]

Column 2: Accuracy = 0.4415, F1 Score = 0.3121, ROC AUC = 0.5915


Training Columns:  80%|████████  | 4/5 [00:29<00:07,  7.17s/it]

Column 3: Accuracy = 0.4096, F1 Score = 0.3129, ROC AUC = 0.5319


Training Columns: 100%|██████████| 5/5 [00:36<00:00,  7.21s/it]

Column 4: Accuracy = 0.6755, F1 Score = 0.3134, ROC AUC = 0.6674
Training complete. Results saved to 'results_summary.csv'.





In [6]:
results_df

Unnamed: 0,Combination,Avg_Test_Accuracy,Avg_F1_Score,Avg_ROC_AUC
0,AT2 - GSL - Attention Network,0.696315,0.464348,0.69595
1,AT2 - HIST - Attention Network,0.69263,0.472156,0.700666
2,ST1 - GSL - Attention Network,0.555409,0.365492,0.589818
3,ST1 - HIST - Attention Network,0.562533,0.35979,0.587887
4,ST2 - GSL - Attention Network,0.704255,0.368094,0.643129
5,ST2 - HIST - Attention Network,0.693617,0.34486,0.604289
6,AT2 - GSL - Average ResNet50,0.585595,0.368792,0.62955
7,AT2 - HIST - Average ResNet50,0.608208,0.385825,0.658222
8,ST1 - GSL - Average ResNet50,0.45752,0.32922,0.563641
9,ST1 - HIST - Average ResNet50,0.439842,0.314328,0.538105


In [7]:
def random_predictions(path):
    labels = pd.read_csv(path)

    id_cols = labels[['study_id', 'series_id']]
    cols_to_impute = labels.drop(columns=['study_id', 'series_id'])
    imputed_cols = cols_to_impute.apply(lambda x: x.fillna(x.mode()[0]))
    final_df = pd.concat([id_cols, imputed_cols], axis=1)

    id_cols = final_df[['study_id', 'series_id']]
    cols_to_encode = final_df.drop(columns=['study_id', 'series_id'])
    encoder = OneHotEncoder(sparse_output=False)
    encoded_cols = encoder.fit_transform(cols_to_encode)
    encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out(cols_to_encode.columns))
    final_df = pd.concat([id_cols, encoded_df], axis=1)

    Y_true = final_df.drop(columns=['study_id', 'series_id']).values

    np.random.seed(42)  # For reproducibility
    random_predictions = np.random.rand(*Y_true.shape)  # Random floats in [0.0, 1.0]

    threshold = np.mean(random_predictions)

    binary_predictions = (random_predictions > threshold).astype(float)

    accuracy = np.mean(np.all(binary_predictions == Y_true, axis=1))

    print(f"Random Predictions Accuracy: {accuracy:.4f}")
    print(f"Self-adjusting Threshold: {threshold:.4f}")

## Random Accuracy

In [8]:
random_predictions('/kaggle/input/preprocessed-dataset/train_data_AT2.csv')
random_predictions('/kaggle/input/preprocessed-dataset/train_data_ST1.csv')
random_predictions('/kaggle/input/preprocessed-dataset/train_data_ST2.csv')

Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.4996
Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.4995
Random Predictions Accuracy: 0.0000
Self-adjusting Threshold: 0.5004
