In [12]:
import pandas as pd

In [13]:
# Load the dataset

# Step 1: Load all three tuning CSVs
df_rf_xgb = pd.read_csv("tox21_model_tuning_results.csv")                # Random Forest & XGBoost
df_lr = pd.read_csv("logistic_regression_tuning_results.csv")           # Logistic Regression
df_svm = pd.read_csv("svm_tuning_results.csv")    

In [14]:
# Step 2: Standardize ROC-AUC column name across all

df_rf_xgb_clean = df_rf_xgb[['Target', 'Model', 'ROC_AUC']]

df_lr_clean = df_lr[['Target', 'Model', 'Best ROC-AUC']].rename(
    columns={'Best ROC-AUC': 'ROC_AUC'}
)

df_svm_clean = df_svm[['Target', 'Model', 'Best ROC-AUC']].rename(
    columns={'Best ROC-AUC': 'ROC_AUC'}
)

In [15]:
# Step 3: Combine all into a single DataFrame
combined_auc_df = pd.concat([df_rf_xgb_clean, df_lr_clean, df_svm_clean], ignore_index=True)

In [16]:
# Round for readability
combined_auc_df['ROC_AUC'] = combined_auc_df['ROC_AUC'].round(4)

# Step 4: Save the combined DataFrame to a CSV file
combined_auc_df.to_csv("combined_model_roc_auc_scores.csv", index=False)

In [17]:
# Preview or analyze
print(combined_auc_df.head())

      Target          Model  ROC_AUC
0      NR-AR  Random Forest   0.8038
1      NR-AR        XGBoost   0.8044
2  NR-AR-LBD  Random Forest   0.8655
3  NR-AR-LBD        XGBoost   0.8558
4     NR-AhR  Random Forest   0.8964


In [18]:
# Pivot test

# Pivot: models as rows, targets as columns, values = ROC_AUC
pivot_df = combined_auc_df.pivot_table(
    index="Model", 
    columns="Target", 
    values="ROC_AUC"
).round(4)

# Optional: Replace NaN with placeholder (if some models are missing for a target)
pivot_df.fillna("–", inplace=True)

# Display
print(pivot_df)

Target               NR-AR  NR-AR-LBD  NR-AhR  NR-Aromatase   NR-ER  \
Model                                                                 
LogisticRegression  0.7972     0.8517  0.8803        0.8199  0.7186   
Random Forest       0.8038     0.8655  0.8964        0.8171  0.7229   
SVM                 0.8023     0.8743  0.8922        0.8246  0.7220   
XGBoost             0.8044     0.8558  0.8905        0.8069  0.7172   

Target              NR-ER-LBD  NR-PPAR-gamma  SR-ARE  SR-ATAD5  SR-HSE  \
Model                                                                    
LogisticRegression     0.7914         0.7759  0.7653    0.8214  0.7451   
Random Forest          0.8144         0.8267  0.8092    0.8395  0.7774   
SVM                    0.8049         0.8265  0.8105    0.8399  0.7754   
XGBoost                0.7914         0.7860  0.7802    0.8199  0.7552   

Target              SR-MMP  SR-p53  
Model                               
LogisticRegression  0.8639  0.8090  
Random Forest    

In [19]:
pivot_df.to_csv("pivoted_model_roc_auc_matrix.csv")