# script - Feature set - d (7 feature model) 

### the following section will be run for model construction (LASSO, RF, ELASTIC) using three different feature set i.e. 
#### All clinical features only (5)
#### All promoter features only (2)
#### clinical + promoter features (7)

In [4]:
import sys
print(sys.version)

3.12.2 | packaged by conda-forge | (main, Feb 16 2024, 20:50:58) [GCC 12.3.0]


In [3]:
import sys
random_state = int(sys.argv[1])

In [11]:
#importing librariers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sksurv.util import Surv
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
from sklearn.exceptions import FitFailedWarning
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FitFailedWarning)
from sklearn.model_selection import GridSearchCV
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sksurv.util import Surv
from sksurv.ensemble import RandomSurvivalForest

In [6]:
#reading the all dataset---
input_all = pd.read_csv("Input.csv")

In [9]:
# Split the data into 70% training and 30% validation while stratifying by 'mRNA_Subtype'
print(f"random_state={random_state}")
train_df, val_df = train_test_split(input_all, test_size=0.30, stratify=input_all['mRNA_Subtype'], random_state=random_state)

# Print the sizes of the splits to verify
print("Training set size:", train_df.shape)
print("Validation set size:", val_df.shape)

# Check the distribution of 'mRNA_Subtype' in both splits
print("\nDistribution of mRNA_Subtype in training set:")
print(train_df['mRNA_Subtype'].value_counts(normalize=True))

print("\nDistribution of mRNA_Subtype in validation set:")
print(val_df['mRNA_Subtype'].value_counts(normalize=True))

random_state=1
Training set size: (242, 24)
Validation set size: (105, 24)

Distribution of mRNA_Subtype in training set:
mRNA_Subtype
BLIS    0.396694
IM      0.243802
LAR     0.214876
MES     0.144628
Name: proportion, dtype: float64

Distribution of mRNA_Subtype in validation set:
mRNA_Subtype
BLIS    0.400000
IM      0.238095
LAR     0.209524
MES     0.152381
Name: proportion, dtype: float64


In [10]:
#for clinical features only 
X = train_df[['Ki67', 'Size_cm','CNA_Subtype_Chr8p21_del', 'CNA_Subtype_Chr13q34_amp', 'CNA_Subtype_Chr20q13_amp']]
y = train_df[['RFS_Status', 'RFS_time_Months']]

#Run for promoter features only 
# X = train_df[['pr3004_huwe1', 'pr9001_cald1']]
# y = train_df[['RFS_Status', 'RFS_time_Months']]

#Run for promoter+clinical features only 
# X = train_df[['Ki67', 'Size_cm','CNA_Subtype_Chr8p21_del', 'CNA_Subtype_Chr13q34_amp', 'CNA_Subtype_Chr20q13_amp', 'pr3004_huwe1', 'pr9001_cald1']]
# y = train_df[['RFS_Status', 'RFS_time_Months']]

# Check the shape of X and y
print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (242, 2)
y shape: (242, 2)


In [12]:
#now convert the y(target matrix) in a structured array:
y_surv = Surv.from_dataframe('RFS_Status', 'RFS_time_Months', y) #false: 0, true: 1)

In [13]:
#defining the different models:

#1) LASSO ------------------hyperparameter -- alpha
coxnet_pipe_lasso = make_pipeline( CoxnetSurvivalAnalysis(l1_ratio = 1.0, alpha_min_ratio = 0.01, max_iter=100))
#2) RIDGE ------------------hyperparamters -- alpha
coxnet_pipe_ridge = make_pipeline( CoxnetSurvivalAnalysis(l1_ratio=1e-5, alpha_min_ratio = 0.01, max_iter=100))  ### range (0.0, 1.0] notice the round bracket
#3) ELASTICNET -------------hyperparameters -- L1 and alpha
coxnet_pipe_elastic = make_pipeline(CoxnetSurvivalAnalysis(max_iter=100, alpha_min_ratio= 0.01))


In [14]:
#defining random survival forest plot -------------hyperparameters ----n_estimators, min_samples_split, min_samples_leaf
##search for best hyperparameters for random survival forest

param_grid_rf = {
    'n_estimators': [100, 500,1000],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [5, 10, 15]
}
event_occurrences = y_surv['RFS_Status']
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)

grid_search_rf = GridSearchCV(
    estimator=RandomSurvivalForest(random_state=random_state, n_jobs=1),
    param_grid=param_grid_rf,
    cv=cv.split(X, event_occurrences),
    n_jobs=1,
    error_score=0.5,
    verbose=1
).fit(X, y_surv)
print("Best Hyperparameters:", grid_search_rf.best_params_)
best_model = grid_search_rf.best_estimator_

# Extract best hyperparameters
min_samples_leaf = grid_search_rf.best_params_['min_samples_leaf']
min_samples_split = grid_search_rf.best_params_['min_samples_split']
n_estimators = grid_search_rf.best_params_['n_estimators']
print(min_samples_leaf)
print(min_samples_split)
print(n_estimators)

Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best Hyperparameters: {'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 100}
5
2
100


In [15]:
#search for best l1 for elastic net
# Set up the parameter grid
param_grid_l1_elastic = {'coxnetsurvivalanalysis__l1_ratio': [0.2, 0.3, 0.4,0.5, 0.6, 0.7, 0.8, 0.9]}

# Perform grid search with cross-validation
event_occurrences = y_surv['RFS_Status']
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)

GSV_elastic_L1 = GridSearchCV(
    coxnet_pipe_elastic,
    param_grid_l1_elastic,
    cv=cv.split(X, event_occurrences),
    error_score=0.5,
    n_jobs=1,
).fit(X, y_surv)


# Best parameters
best_l1_ratio = GSV_elastic_L1.best_params_['coxnetsurvivalanalysis__l1_ratio']
print(f"Best L1 Ratio: {best_l1_ratio}")


Best L1 Ratio: 0.2


In [16]:
#fit elastic model with best l1 ratio:
coxnet_pipe_elastic = make_pipeline( CoxnetSurvivalAnalysis(l1_ratio=best_l1_ratio, alpha_min_ratio = 0.01, max_iter=100))  ### range (0.0, 1.0] notice the round bracket

In [17]:
coxnet_pipe_lasso.fit(X, y_surv)
coxnet_pipe_ridge.fit(X, y_surv)
coxnet_pipe_elastic.fit(X, y_surv)

In [18]:
#now i have to search for best alpha for each model
estimated_alphas_lasso = coxnet_pipe_lasso.named_steps['coxnetsurvivalanalysis'].alphas_
estimated_alphas_ridge = coxnet_pipe_ridge.named_steps['coxnetsurvivalanalysis'].alphas_
estimated_alphas_elastic = coxnet_pipe_elastic.named_steps['coxnetsurvivalanalysis'].alphas_

In [19]:
#now the training dataset is divided into 3-folds (stratified based on number of events in each class), and for that perform grid search
event_occurrences = y_surv['RFS_Status']

from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sksurv.util import Surv
#cv = KFold(n_splits=5, shuffle=True, random_state=0)
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)

GSV_alpha_lasso = GridSearchCV(
    coxnet_pipe_lasso,
    param_grid={"coxnetsurvivalanalysis__alphas": [[v] for v in estimated_alphas_lasso]},
    cv=cv.split(X, event_occurrences),
    error_score=0.5,
    n_jobs=1,
).fit(X, y_surv)

GSV_alpha_ridge = GridSearchCV(
    coxnet_pipe_ridge,
    param_grid={"coxnetsurvivalanalysis__alphas": [[v] for v in estimated_alphas_ridge]},
    cv=cv.split(X, event_occurrences),
    error_score=0.5,
    n_jobs=1,
).fit(X, y_surv)

GSV_alpha_elastic = GridSearchCV(
    coxnet_pipe_elastic,
    param_grid={"coxnetsurvivalanalysis__alphas": [[v] for v in estimated_alphas_elastic]},
    cv=cv.split(X, event_occurrences),
    error_score=0.5,
    n_jobs=1,
).fit(X, y_surv)

In [20]:
alpha_lasso = GSV_alpha_lasso.best_params_["coxnetsurvivalanalysis__alphas"]
alpha_ridge = GSV_alpha_ridge.best_params_["coxnetsurvivalanalysis__alphas"]
alpha_elastic = GSV_alpha_elastic.best_params_["coxnetsurvivalanalysis__alphas"]


print("alpha_lasso = ",alpha_lasso)
print("alpha_ridge = ",alpha_ridge)
print("alpha_elastic = ",alpha_elastic)

alpha_lasso =  [0.01310199226384447]
alpha_ridge =  [3170.8501802847313]
alpha_elastic =  [0.06550996131922238]


In [21]:
#now using the above calculated alpha for model construction: and evaluating the performance on training dataset itself
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sksurv.util import Surv
from sksurv.metrics import concordance_index_ipcw

kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)

c_indices_lasso = []
c_indices_ridge = []
c_indices_elastic_net = []
c_indices_rf = []

ibs_scores_lasso = []
ibs_scores_ridge = []
ibs_scores_elastic_net = []
ibs_scores_rf = []

fold_numbers = []

time_dependent_aucs_lasso = []
time_dependent_aucs_ridge = []
time_dependent_aucs_elastic_net = []
time_dependent_aucs_rf = []


c_uno_lasso = []
c_uno_ridge = []
c_uno_elastic = []
c_uno_rf = []


from sksurv.metrics import concordance_index_censored, integrated_brier_score, cumulative_dynamic_auc
from sksurv.metrics import concordance_index_censored, integrated_brier_score
for fold, (train_index, test_index) in enumerate(kf.split(X, event_occurrences), 1):
    x_train, x_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y_surv[train_index], y_surv[test_index]

    #defining four models
    model_lasso = CoxnetSurvivalAnalysis(alphas=alpha_lasso, fit_baseline_model=True, l1_ratio=1.0).fit(x_train, y_train)
    model_ridge = CoxnetSurvivalAnalysis(alphas=alpha_ridge, fit_baseline_model=True, l1_ratio=1e-5).fit(x_train, y_train)
    model_elastic = CoxnetSurvivalAnalysis(alphas=alpha_elastic, fit_baseline_model=True, l1_ratio=best_l1_ratio).fit(x_train, y_train)
    model_rf = RandomSurvivalForest(n_estimators=n_estimators, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, n_jobs=1, random_state=random_state).fit(x_train, y_train)


    # Calculating c-uno for each model
    c_uno_lasso_1 = concordance_index_ipcw(y_train, y_test, model_lasso.predict(x_test))[0]
    c_uno_lasso.append(c_uno_lasso_1)

    c_uno_ridge_1 = concordance_index_ipcw(y_train, y_test, model_ridge.predict(x_test))[0]
    c_uno_ridge.append(c_uno_ridge_1)

    c_uno_elastic_1 = concordance_index_ipcw(y_train, y_test, model_elastic.predict(x_test))[0]
    c_uno_elastic.append(c_uno_elastic_1)

    c_uno_rf_1 = concordance_index_ipcw(y_train, y_test, model_rf.predict(x_test))[0]
    c_uno_rf.append(c_uno_rf_1)

    #c-index calculation for each model
    c_index_lasso = concordance_index_censored(y_test['RFS_Status'], y_test['RFS_time_Months'], model_lasso.predict(x_test))[0]
    c_indices_lasso.append(c_index_lasso)

    c_index_ridge = concordance_index_censored(y_test['RFS_Status'], y_test['RFS_time_Months'], model_ridge.predict(x_test))[0]
    c_indices_ridge.append(c_index_ridge)

    c_index_elastic = concordance_index_censored(y_test['RFS_Status'], y_test['RFS_time_Months'], model_elastic.predict(x_test))[0]
    c_indices_elastic_net.append(c_index_elastic)

    c_index_rf = concordance_index_censored(y_test['RFS_Status'], y_test['RFS_time_Months'], model_rf.predict(x_test))[0]
    c_indices_rf.append(c_index_rf)

    #ibs calculation for each model
    times = np.percentile(y_test['RFS_time_Months'], np.linspace(5, 95, 20))

    surv_funcs_lasso = model_lasso.predict_survival_function(x_test)
    surv_probs_lasso = np.asarray([[fn(t) for t in times] for fn in surv_funcs_lasso])
    ibs_lasso = integrated_brier_score(y_train, y_test, surv_probs_lasso, times)
    ibs_scores_lasso.append(ibs_lasso)

    surv_funcs_ridge = model_ridge.predict_survival_function(x_test)
    surv_probs_ridge = np.asarray([[fn(t) for t in times] for fn in surv_funcs_lasso])
    ibs_ridge = integrated_brier_score(y_train, y_test, surv_probs_ridge, times)
    ibs_scores_ridge.append(ibs_ridge)

    surv_funcs_elastic = model_elastic.predict_survival_function(x_test)
    surv_probs_elastic = np.asarray([[fn(t) for t in times] for fn in surv_funcs_elastic])
    ibs_elastic = integrated_brier_score(y_train, y_test, surv_probs_elastic, times)
    ibs_scores_elastic_net.append(ibs_elastic)

    surv_funcs_rf = model_rf.predict_survival_function(x_test)
    surv_probs_rf = np.asarray([[fn(t) for t in times] for fn in surv_funcs_rf])
    ibs_rf = integrated_brier_score(y_train, y_test, surv_probs_rf, times)
    ibs_scores_rf.append(ibs_rf)

    #Time-dependent ROC AUC calculation for each model
    auc_lasso, mean_auc_lasso = cumulative_dynamic_auc(y_train, y_test, model_lasso.predict(x_test), times)
    time_dependent_aucs_lasso.append(mean_auc_lasso)

    auc_ridge, mean_auc_ridge = cumulative_dynamic_auc(y_train, y_test, model_ridge.predict(x_test), times)
    time_dependent_aucs_ridge.append(mean_auc_ridge)

    auc_elastic, mean_auc_elastic = cumulative_dynamic_auc(y_train, y_test, model_elastic.predict(x_test), times)
    time_dependent_aucs_elastic_net.append(mean_auc_elastic)

    auc_rf, mean_auc_rf = cumulative_dynamic_auc(y_train, y_test, model_rf.predict(x_test), times)
    time_dependent_aucs_rf.append(mean_auc_rf)

    #adding fold number info
    fold_numbers.append(fold)

In [22]:
#calculating mean measures:
mean_c_index_lasso = np.mean(c_indices_lasso)
mean_c_index_ridge = np.mean(c_indices_ridge)
mean_c_index_elastic = np.mean(c_indices_elastic_net)
mean_c_index_rf = np.mean(c_indices_rf)

mean_ibs_lasso = np.mean(ibs_scores_lasso)
mean_ibs_ridge = np.mean(ibs_scores_ridge)
mean_ibs_elastic = np.mean(ibs_scores_elastic_net)
mean_ibs_rf = np.mean(ibs_scores_rf)

mean_auc_lasso = np.mean(time_dependent_aucs_lasso)
mean_auc_ridge = np.mean(time_dependent_aucs_ridge)
mean_auc_elastic = np.mean(time_dependent_aucs_elastic_net)
mean_auc_rf = np.mean(time_dependent_aucs_rf)

mean_c_uno_lasso = np.mean(c_uno_lasso)
mean_c_uno_ridge = np.mean(c_uno_ridge)
mean_c_uno_elastic = np.mean(c_uno_elastic)
mean_c_uno_rf = np.mean(c_uno_rf)

#calculating standard deviation of measures:
std_c_index_lasso = np.std(c_indices_lasso)
std_c_index_ridge = np.std(c_indices_ridge)
std_c_index_elastic = np.std(c_indices_elastic_net)
std_c_index_rf = np.std(c_indices_rf)

std_ibs_lasso = np.std(ibs_scores_lasso)
std_ibs_ridge = np.std(ibs_scores_ridge)
std_ibs_elastic = np.std(ibs_scores_elastic_net)
std_ibs_rf = np.std(ibs_scores_rf)

std_auc_lasso = np.std(time_dependent_aucs_lasso)
std_auc_ridge = np.std(time_dependent_aucs_ridge)
std_auc_elastic = np.std(time_dependent_aucs_elastic_net)
std_auc_rf = np.std(time_dependent_aucs_rf)

std_c_uno_lasso = np.std(c_uno_lasso)
std_c_uno_ridge = np.std(c_uno_ridge)
std_c_uno_elastic = np.std(c_uno_elastic)
std_c_uno_rf = np.std(c_uno_rf)


In [23]:
#now constructing the model, taking all the samples
model_lasso = CoxnetSurvivalAnalysis(alphas=alpha_lasso, fit_baseline_model=True, l1_ratio=1.0).fit(X, y_surv)
model_ridge = CoxnetSurvivalAnalysis(alphas=alpha_ridge, fit_baseline_model=True, l1_ratio=1e-5).fit(X, y_surv)
model_elastic = CoxnetSurvivalAnalysis(alphas=alpha_elastic, fit_baseline_model=True, l1_ratio=best_l1_ratio).fit(X, y_surv)
model_rf = RandomSurvivalForest(n_estimators=n_estimators, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, n_jobs=1, random_state=random_state).fit(X, y_surv)


In [24]:
#now checking the performance on validation dataset
X_val = val_df[['Ki67', 'Size_cm','CNA_Subtype_Chr8p21_del', 'CNA_Subtype_Chr13q34_amp', 'CNA_Subtype_Chr20q13_amp']]
y_val = val_df[['RFS_Status', 'RFS_time_Months']]

#Run for promoter features only 
# X_val = val_df[['pr3004_huwe1', 'pr9001_cald1']]
# y_val = val_df[['RFS_Status', 'RFS_time_Months']]

#Run for promoter+clinical features only 
# X_val = val_df[['Ki67', 'Size_cm','CNA_Subtype_Chr8p21_del', 'CNA_Subtype_Chr13q34_amp', 'CNA_Subtype_Chr20q13_amp', 'pr3004_huwe1', 'pr9001_cald1']]
# y_val = val_df[['RFS_Status', 'RFS_time_Months']]


In [25]:
y_val_surv = Surv.from_dataframe('RFS_Status', 'RFS_time_Months', y_val) #false: 0, true: 1)

In [26]:
#now calculating matrices for validation dataset ---- by each method

c_index_lasso_test = concordance_index_censored(y_val_surv['RFS_Status'], y_val_surv['RFS_time_Months'], model_lasso.predict(X_val))[0]
c_index_ridge_test = concordance_index_censored(y_val_surv['RFS_Status'], y_val_surv['RFS_time_Months'], model_ridge.predict(X_val))[0]
c_index_elastic_test = concordance_index_censored(y_val_surv['RFS_Status'], y_val_surv['RFS_time_Months'], model_elastic.predict(X_val))[0]
c_index_rf_test = concordance_index_censored(y_val_surv['RFS_Status'], y_val_surv['RFS_time_Months'], model_rf.predict(X_val))[0]

#calculating the c-uno for validation dataset
concordance_index_ipcw(y_train, y_test, model_lasso.predict(x_test))[0]

c_uno_lasso_test = concordance_index_ipcw(y_surv, y_val_surv, model_lasso.predict(X_val))[0]
c_uno_ridge_test = concordance_index_ipcw(y_surv, y_val_surv, model_ridge.predict(X_val))[0]
c_uno_elastic_test = concordance_index_ipcw(y_surv, y_val_surv, model_elastic.predict(X_val))[0]
c_uno_rf_test = concordance_index_ipcw(y_surv, y_val_surv, model_rf.predict(X_val))[0]

#simialrly, calculating the ibs score for the validation data ----- for each method
times = np.percentile(y_surv['RFS_time_Months'], np.linspace(5, 95, 20))

surv_funcs_lasso = model_lasso.predict_survival_function(X_val)
surv_probs_lasso = np.asarray([[fn(t) for t in times] for fn in surv_funcs_lasso])
ibs_test_lasso = integrated_brier_score(y_surv, y_val_surv, surv_probs_lasso, times)

surv_funcs_ridge = model_ridge.predict_survival_function(X_val)
surv_probs_ridge = np.asarray([[fn(t) for t in times] for fn in surv_funcs_ridge])
ibs_test_ridge = integrated_brier_score(y_surv, y_val_surv, surv_probs_ridge, times)

surv_funcs_elastic = model_elastic.predict_survival_function(X_val)
surv_probs_elastic = np.asarray([[fn(t) for t in times] for fn in surv_funcs_elastic])
ibs_test_elastic = integrated_brier_score(y_surv, y_val_surv, surv_probs_elastic, times)


surv_funcs_rf = model_rf.predict_survival_function(X_val)
surv_probs_rf = np.asarray([[fn(t) for t in times] for fn in surv_funcs_rf])
ibs_test_rf = integrated_brier_score(y_surv, y_val_surv, surv_probs_rf, times)

#calculating Time-dependent ROC AUC calculation --- for each method
times = np.percentile(y_surv['RFS_time_Months'], np.linspace(5, 95, 20))

auc_lasso, mean_auc_lasso_test = cumulative_dynamic_auc(y_surv, y_val_surv, model_lasso.predict(X_val), times)
auc_test_lasso = mean_auc_lasso_test

auc_ridge, mean_auc_ridge_test = cumulative_dynamic_auc(y_surv, y_val_surv, model_ridge.predict(X_val), times)
auc_test_ridge = mean_auc_ridge_test

auc_elastic, mean_auc_elastic_test = cumulative_dynamic_auc(y_surv, y_val_surv, model_elastic.predict(X_val), times)
auc_test_elastic = mean_auc_elastic_test

auc_rf, mean_auc_rf_test = cumulative_dynamic_auc(y_surv, y_val_surv, model_rf.predict(X_val), times)
auc_test_rf = mean_auc_rf_test


In [None]:
import pandas as pd
import os

# Define the file path to save the results
output_file = './7feature_model/clin_only.csv' #<---------can be changed based on the file name you want

# Step 1: Check if the DataFrame already exists
if os.path.exists(output_file):
    # Load the existing DataFrame from CSV
    df = pd.read_csv(output_file)
else:
    # If not, create a new empty DataFrame
    df = pd.DataFrame()

# Step 2: Organize the current run's data into a dictionary (new row)
data = {
    'random_state': random_state,
    'best_l1_ratio': best_l1_ratio,
    'alpha_lasso': alpha_lasso,
    'alpha_ridge': alpha_ridge,
    'alpha_elastic': alpha_elastic,
    'min_samples_leaf': min_samples_leaf,
    'min_samples_split': min_samples_split,
    'n_estimators': n_estimators,


    'c_indices_lasso': [c_indices_lasso],
    'mean_c_index_lasso': mean_c_index_lasso,
    'std_c_index_lasso': std_c_index_lasso,
    'c_index_lasso_test': c_index_lasso_test,

    'c_indices_ridge': [c_indices_ridge],
    'mean_c_index_ridge': mean_c_index_ridge,
    'std_c_index_ridge': std_c_index_ridge,
    'c_index_ridge_test': c_index_ridge_test,

    'c_indices_elastic_net': [c_indices_elastic_net],
    'mean_c_index_elastic': mean_c_index_elastic,
    'std_c_index_elastic': std_c_index_elastic,
    'c_index_elastic_test': c_index_elastic_test,

    'c_indices_rf': [c_indices_rf],
    'mean_c_index_rf': mean_c_index_rf,
    'std_c_index_rf': std_c_index_rf,
    'c_index_rf_test': c_index_rf_test,

    'ibs_scores_lasso': [ibs_scores_lasso],
    'mean_ibs_lasso': mean_ibs_lasso,
    'std_ibs_lasso': std_ibs_lasso,
    'ibs_test_lasso': ibs_test_lasso,

    'ibs_scores_ridge': [ibs_scores_ridge],
    'mean_ibs_ridge': mean_ibs_ridge,
    'std_ibs_ridge': std_ibs_ridge,
    'ibs_test_ridge': ibs_test_ridge,

    'ibs_scores_elastic_net': [ibs_scores_elastic_net],
    'mean_ibs_elastic': mean_ibs_elastic,
    'std_ibs_elastic': std_ibs_elastic,
    'ibs_test_elastic': ibs_test_elastic,

    'ibs_scores_rf': [ibs_scores_rf],
    'mean_ibs_rf': mean_ibs_rf,
    'std_ibs_rf': std_ibs_rf,
    'ibs_test_rf': ibs_test_rf,

    'time_dependent_aucs_lasso': [time_dependent_aucs_lasso],
    'mean_auc_lasso': mean_auc_lasso,
    'std_auc_lasso': std_auc_lasso,
    'auc_test_lasso': auc_test_lasso,

    'time_dependent_aucs_ridge': [time_dependent_aucs_ridge],
    'mean_auc_ridge': mean_auc_ridge,
    'std_auc_ridge': std_auc_ridge,
    'auc_test_ridge': auc_test_ridge,

    'time_dependent_aucs_elastic_net': [time_dependent_aucs_elastic_net],
    'mean_auc_elastic': mean_auc_elastic,
    'std_auc_elastic': std_auc_elastic,
    'auc_test_elastic': auc_test_elastic,

    'time_dependent_aucs_rf': [time_dependent_aucs_rf],
    'mean_auc_rf': mean_auc_rf,
    'std_auc_rf': std_auc_rf,
    'auc_test_rf': auc_test_rf,

    'c_uno_lasso': [c_uno_lasso],
    'mean_c_uno_lasso': mean_c_uno_lasso,
    'std_c_uno_lasso': std_c_uno_lasso,
    'c_uno_lasso_test': c_uno_lasso_test,

    'c_uno_ridge': [c_uno_ridge],
    'mean_c_uno_ridge': mean_c_uno_ridge,
    'std_c_uno_ridge': std_c_uno_ridge,
    'c_uno_ridge_test': c_uno_ridge_test,

    'c_uno_elastic_net': [c_uno_elastic],
    'mean_c_uno_elastic': mean_c_uno_elastic,
    'std_c_uno_elastic': std_c_uno_elastic,
    'c_uno_elastic_test': c_uno_elastic_test,

    'c_indices_rf': [c_uno_rf],
    'mean_c_uno_rf': mean_c_uno_rf,
    'std_c_uno_rf': std_c_uno_rf,
    'c_uno_rf_test': c_uno_rf_test,
}

# Convert the dictionary into a DataFrame
new_row = pd.DataFrame([data])

# Step 3: Concatenate the new row to the DataFrame
df = pd.concat([df, new_row], ignore_index=True)

# Step 4: Save the updated DataFrame to CSV (or you can use a different format like pickle)
df.to_csv(output_file, index=False)

# Now the DataFrame is updated and saved, preserving results between runs
