In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from collections import defaultdict
from xgboost import XGBClassifier

### Read the CSV

In [None]:
test = pd.read_csv("final_data/densenet_valid_embeddings.csv")
train = pd.read_csv("final_data/densenet_test_embeddings.csv", quotechar='"', on_bad_lines='skip')

print(test.columns)
test.head()

### Convert embeddings from str to list (a bit long for large data sets)

In [None]:
test['embeddings'] = test['embeddings'].apply(ast.literal_eval)

In [None]:
train['embeddings'] = train['embeddings'].apply(ast.literal_eval)

### Remove columns

In [None]:
test = test.drop(columns=['path_to_image', 'path_to_dcm'])
train = train.drop(columns=['path_to_image', 'path_to_dcm'])


### Remove rows that were not processed (embeddings = 0)

In [None]:
initial_size = test.shape[0] 

# The previous logic with transforming the list to string and filtering on the length of said string is not necessarily stable and misleading.
# Let's implement a more explicit test for what we actually care about: 

test = test[test['embeddings'].apply(type) == list]

final_size = test.shape[0] 

print(f'Number of test removed rows = {initial_size - final_size}')

initial_size = train.shape[0] 

train = train[train['embeddings'].apply(type) == list]

final_size = train.shape[0] 

print(f'Number of train removed rows = {initial_size - final_size}')

In [None]:
test.shape

### Train test

In [None]:

# # Using Random Forest as the base estimator
# base_rf = RandomForestClassifier(n_estimators=100, random_state=42)

# # Multi-output classifier
# multi_target_rf = MultiOutputClassifier(base_rf, n_jobs=-1)

# # Train the model
# multi_target_rf.fit(train_embeddings, y_train)

# # Predict on test data
# predictions = multi_target_rf.predict(test_embeddings)

# # Evaluate the model
# print("Accuracy:", accuracy_score(y_test, predictions))
# print("F1 Score:", f1_score(y_test, predictions, average='micro'))
# print("Recall:", recall_score(y_test, predictions, average='micro'))
# print("Precision:", precision_score(y_test, predictions, average='micro'))


In [None]:
train_embeddings = pd.DataFrame(train['embeddings'].tolist(), columns=[f'embedding_{i}' for i in range(1024)])
test_embeddings = pd.DataFrame(test['embeddings'].tolist(), columns=[f'embedding_{i}' for i in range(1024)])

# Diseases to predict
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
            'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture']

# Labels for train and test
y_train = train[diseases]
y_test = test[diseases]

# Create x_train and x_test
x_train = pd.concat([train.reset_index(), train_embeddings], axis=1)
x_test =  pd.concat([test.reset_index(), test_embeddings], axis=1)

x_train.drop(columns=["embeddings"] + diseases, inplace=True)
x_test.drop(columns=["embeddings"] + diseases, inplace=True)

# Create some backups:
_x_train, _x_test, _y_train, _y_test = x_train.copy(deep=True), x_test.copy(deep=True), y_train.copy(deep=True), y_test.copy(deep=True)
# To restore the backups, run:
# x_train, x_test, y_train, y_test = _x_train.copy(deep=True), _x_test.copy(deep=True), _y_train.copy(deep=True), _y_test.copy(deep=True)



In [None]:
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    # Apply PCA if specified
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    # Prepare to store all metrics
    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': []
    }

    def calculate_metrics(y_true, y_pred, disease_label="Overall"):
        """ Helper function to calculate metrics and add them to metrics_data """
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

    # Train the model on the full dataset
    try:
        multi_output_model.fit(x_train_subset, y_train)
        # Make predictions
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds)
        # Calculate metrics for each disease individually
        for disease in diseases:
            calculate_metrics(y_test[disease], y_test_preds[disease], disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, disease_label=disease)

    # Calculate metrics for each dimension in `metric_dimensions`
    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            # Create a boolean mask for the subgroup
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup)
                for disease in diseases:
                    calculate_metrics(y_test_subgroup[disease], y_test_preds_subgroup[disease], disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, disease_label=disease)

    # Convert metrics_data to DataFrame and return
    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    print(metrics_df.columns) 
    return metrics_df

def plot_metrics(metrics_df, metric_name, modelname, trainingsize):
    fig, axes = plt.subplots(2, 2, figsize=(20, 14))  # Größere Figur
    fig.suptitle(f"{metric_name} Comparison Across Dimensions", fontsize=18)
    fig.text(0.5, 0.94, f"Trained with model: {modelname} on the following training size: {trainingsize}", 
             ha='center', fontsize=12, color="gray")

    palette = ["#3498DB", "#FFC300", "#2ECC71", "#E74C3C"]
# Plot 1: Metric by Disease for each Sex
    sex_data = metrics_df[metrics_df['Metric Dimension'] == 'sex']
    sns.barplot(data=sex_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[0, 0], palette=palette[:2])
    axes[0, 0].set_title(f"{metric_name} by Disease and Sex", fontsize=14)
    axes[0, 0].tick_params(axis='x', rotation=90)
    
    # Plot 2: Metric by Disease for each Insurance_Type
    insurance_data = metrics_df[metrics_df['Metric Dimension'] == 'insurance_type']
    sns.barplot(data=insurance_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[0, 1], palette=palette[:3])
    axes[0, 1].set_title(f"{metric_name} by Disease and Insurance Type", fontsize=14)
    axes[0, 1].tick_params(axis='x', rotation=90)  
    
    # Plot 3: Metric by Disease for each Race
    race_data = metrics_df[metrics_df['Metric Dimension'] == 'race']
    sns.barplot(data=race_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[1, 0], palette=palette[:3])  
    axes[1, 0].set_title(f"{metric_name} by Disease and Race", fontsize=14)
    axes[1, 0].tick_params(axis='x', rotation=90)  
    
    # Plot 4: Overall Metric by Disease (no subgroups)
    overall_disease_data = metrics_df[(metrics_df['Metric Dimension'] == 'all') & (metrics_df['Disease'] != 'Overall')]
    overall_metric = metrics_df[(metrics_df['Metric Dimension'] == 'all') & (metrics_df['Disease'] == 'Overall')]
    # Add the overall metric as a new row in the DataFrame for plotting
    if not overall_metric.empty:
        overall_row = pd.DataFrame({
            'Disease': ['Overall'], 
            metric_name: [overall_metric[metric_name].values[0]]  # Accuracy over all diseases
        })
        overall_disease_data = pd.concat([overall_disease_data, overall_row], ignore_index=True)

    # Plot the data including the "Overall" bar
    sns.barplot(data=overall_disease_data, x='Disease', y=metric_name, ax=axes[1, 1], color=palette[0])
    axes[1, 1].set_title(f"{metric_name} by Disease (Overall)", fontsize=14)
    axes[1, 1].tick_params(axis='x', rotation=90)
    plt.subplots_adjust(wspace=0.1, hspace=0.3)  
    plt.tight_layout(rect=[0, 0, 1, 0.92])  
    plt.savefig(f"ml_plots/{metric_name}_{modelname}_{trainingsize}_comparison.png", bbox_inches='tight')
    plt.show()

training_size = 2000
use_pca = True        
n_components = 10     

# rcf = RandomForestClassifier(n_estimators=30, random_state=42)
# rcf_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=rcf, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components   
# )

# # Naive Bayes
# naive_bayes = MultinomialNB()
# nb_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=naive_bayes, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=None  
# )

#Gradient Boosting
xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)
xgb_metrics = train_model(
   x_train=x_train.iloc[:training_size], 
   y_train=y_train.iloc[:training_size], 
   x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
   n_components=n_components   
)



# # Decision Tree
# dct = DecisionTreeClassifier(random_state=42)
# dct_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=dct, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components 
# )

# Plotting
for metric in ['Accuracy', 'F1 Score', 'Recall', 'Precision']:
    #plot_metrics(rcf_metrics, metric, "RandomForest_with_PCA" , str(training_size))
    #plot_metrics(nb_metrics, metric, "NaiveBayes", str(training_size))
    plot_metrics(xgb_metrics, metric, "GradientBoosting_with_PCA" , str(training_size))
    # plot_metrics(dct_metrics, metric, "DecisionTree_with_PCA", str(training_size))



In [None]:
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    # Apply PCA if wanted
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': [] 
    }

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        """ Helper function to calculate metrics and add them to metrics_data """
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
            except ValueError: 
                auc = None
            metrics_data['AUC'].append(auc)
        else:
            metrics_data['AUC'].append(None)
    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))}) # Dataframe with probabilites 
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        # Calculate metrics for each disease individually
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    # Calculate metrics for each dimension in `metric_dimensions` - in our case mostly sex, race, and insurance_type
    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique(): #looks at every unique subgroup e.g. female and male 
            mask = (x_test[metric_dim] == subgroup) # here we only look at the rows where the subgroup e.g. female is True
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask] # filtern of the predictions we made before just with that one subgroup
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None # filters also if applicable the y_test_pred_proba for that subgorup and line?
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    # Convert metrics_data to DataFrame and return
    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    print(metrics_df.columns) 
    return metrics_df

def plot_metrics(metrics_df, metric_name, modelname, trainingsize):
    fig, axes = plt.subplots(2, 2, figsize=(20, 14))  # Größere Figur
    fig.suptitle(f"{metric_name} Comparison Across Dimensions", fontsize=18)
    fig.text(0.5, 0.94, f"Trained with model: {modelname} on the following training size: {trainingsize}", 
             ha='center', fontsize=12, color="gray")

    palette = ["#3498DB", "#FFC300", "#2ECC71", "#E74C3C"]
# Plot 1: Metric by Disease for each Sex
    sex_data = metrics_df[metrics_df['Metric Dimension'] == 'sex']
    sns.barplot(data=sex_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[0, 0], palette=palette[:2])
    axes[0, 0].set_title(f"{metric_name} by Disease and Sex", fontsize=14)
    axes[0, 0].tick_params(axis='x', rotation=90)
    
    # Plot 2: Metric by Disease for each Insurance_Type
    insurance_data = metrics_df[metrics_df['Metric Dimension'] == 'insurance_type']
    sns.barplot(data=insurance_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[0, 1], palette=palette[:3])
    axes[0, 1].set_title(f"{metric_name} by Disease and Insurance Type", fontsize=14)
    axes[0, 1].tick_params(axis='x', rotation=90)  
    
    # Plot 3: Metric by Disease for each Race
    race_data = metrics_df[metrics_df['Metric Dimension'] == 'race']
    sns.barplot(data=race_data, x='Disease', y=metric_name, hue='Subgroup', ax=axes[1, 0], palette=palette[:3])  
    axes[1, 0].set_title(f"{metric_name} by Disease and Race", fontsize=14)
    axes[1, 0].tick_params(axis='x', rotation=90)  
    
    # Plot 4: Overall Metric by Disease (no subgroups)
    overall_disease_data = metrics_df[(metrics_df['Metric Dimension'] == 'all') & (metrics_df['Disease'] != 'Overall')]
    overall_metric = metrics_df[(metrics_df['Metric Dimension'] == 'all') & (metrics_df['Disease'] == 'Overall')]
    # Add the overall metric as a new row in the DataFrame for plotting
    if not overall_metric.empty:
        overall_row = pd.DataFrame({
            'Disease': ['Overall'], 
            metric_name: [overall_metric[metric_name].values[0]]  # Accuracy over all diseases
        })
        overall_disease_data = pd.concat([overall_disease_data, overall_row], ignore_index=True)

    # Plot the data including the "Overall" bar
    sns.barplot(data=overall_disease_data, x='Disease', y=metric_name, ax=axes[1, 1], color=palette[0])
    axes[1, 1].set_title(f"{metric_name} by Disease (Overall)", fontsize=14)
    axes[1, 1].tick_params(axis='x', rotation=90)
    plt.subplots_adjust(wspace=0.1, hspace=0.3)  
    plt.tight_layout(rect=[0, 0, 1, 0.92])  
    plt.savefig(f"ml_plots/{metric_name}_{modelname}_{trainingsize}_comparison.png", bbox_inches='tight')
    plt.show()

training_size = 2000
use_pca = True        
n_components = 10     

xgb_model = XGBClassifier(
    use_label_encoder=False,      
    eval_metric='logloss',   
    learning_rate=0.1,
    n_estimators=30,
    random_state=42
)
xgb_metrics = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb_model, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components 
)

# rcf = RandomForestClassifier(n_estimators=30, random_state=42)
# rcf_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=rcf, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components   
# )

# # Naive Bayes
# naive_bayes = MultinomialNB()
# nb_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=naive_bayes, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=None  
# )

# # Gradient Boosting
# xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)
# xgb_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=xgb, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components   
# )

# # Decision Tree
# dct = DecisionTreeClassifier(random_state=42)
# dct_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=dct, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components 
# )

# Plotting
for metric in ['Accuracy', 'F1 Score', 'Recall', 'Precision','AUC']:
    #plot_metrics(rcf_metrics, metric, "RandomForest_with_PCA" , str(training_size))
    #plot_metrics(nb_metrics, metric, "NaiveBayes", str(training_size))
    plot_metrics(xgb_metrics, metric, "GradientBoosting_with_PCA" , str(training_size))
    # plot_metrics(dct_metrics, metric, "DecisionTree_with_PCA", str(training_size))


In [None]:


diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    # Apply PCA if specified
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    # Prepare to store all metrics
    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []  # Adding AUC field
    }

    # Dictionary to store AUC values per disease for each subgroup (to calculate Delta AUC)
    auc_values_per_disease = defaultdict(dict)

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        """ Helper function to calculate metrics and add them to metrics_data """
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        # Calculate AUC if probabilities are provided
        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Only store for subgroups, not for "overall"
                    auc_values_per_disease[disease_label][subgroup] = auc
            except ValueError:  
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    # Calculate metrics for each dimension in `metric_dimensions`
    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    # Calculate Delta AUC for each disease
    delta_auc_values = {}
    for disease, auc_dict in auc_values_per_disease.items():
        if len(auc_dict) > 1:  # Ensure there are multiple subgroups to compare
            delta_auc = max(auc_dict.values()) - min(auc_dict.values())
            delta_auc_values[disease] = delta_auc
        else:
            delta_auc_values[disease] = None  # Not applicable if only one subgroup

    # Print Delta AUC for each disease
    for disease, delta_auc in delta_auc_values.items():
        print(f"Delta AUC for {disease}: {delta_auc}")

    # Convert metrics_data to DataFrame and return
    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    print(metrics_df.columns) 
    return metrics_df

training_size = 2000
use_pca = True        
n_components = 10     

# rcf = RandomForestClassifier(n_estimators=30, random_state=42)
# rcf_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=rcf, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components   
# )

# # Naive Bayes
# naive_bayes = MultinomialNB()
# nb_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=naive_bayes, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=None  
# )

# Gradient Boosting
xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)
xgb_metrics = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

### Set the disease accoring to a threashold

In [None]:
from sklearn.metrics import roc_auc_score
import pandas as pd
from collections import defaultdict

diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Only store for subgroups, not for "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    delta_auc_table = {'Disease': [], 'Metric Dimension': [], 'Delta AUC': []}
    max_delta_auc = 0

    for disease, dim_dict in auc_values_per_disease.items():
        for dimension, subgroups in dim_dict.items():
            if len(subgroups) > 1:  # Only calculate delta if multiple subgroups
                delta_auc = max(subgroups.values()) - min(subgroups.values())
                delta_auc_table['Disease'].append(disease)
                delta_auc_table['Metric Dimension'].append(dimension)
                delta_auc_table['Delta AUC'].append(delta_auc)
                if delta_auc > max_delta_auc:
                    max_delta_auc = delta_auc

    print(f"Maximum Delta AUC: {max_delta_auc}")

    delta_auc_df = pd.DataFrame(delta_auc_table)
    print(delta_auc_df)

    # Convert metrics_data to DataFrame and return
    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df

training_size = 2000
use_pca = True        
n_components = 10     

# rcf = RandomForestClassifier(n_estimators=30, random_state=42)
# rcf_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=rcf, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=n_components   
# )

# # Naive Bayes
# naive_bayes = MultinomialNB()
# nb_metrics = train_model(
#     x_train=x_train.iloc[:training_size], 
#     y_train=y_train.iloc[:training_size], 
#     x_test=x_test, 
#     y_test=y_test, 
#     model=naive_bayes, 
#     metric_dimensions=["sex", "race", "insurance_type"],
#     n_components=None  
# )

# Gradient Boosting
xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)
xgb_metrics = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

In [None]:
def calculate_delta_auc(auc_values_per_disease):
    """Berechnet Delta AUC für jede Krankheit und jede Metric Dimension"""
    delta_auc_table = {'Disease': [], 'Metric Dimension': [], 'Delta AUC': []}
    max_delta_auc = 0

    for disease, dim_dict in auc_values_per_disease.items():
        for dimension, subgroups in dim_dict.items():
            if len(subgroups) > 1:  # Berechne Delta nur, wenn mehrere Subgruppen vorhanden sind
                delta_auc = max(subgroups.values()) - min(subgroups.values())
                delta_auc_table['Disease'].append(disease)
                delta_auc_table['Metric Dimension'].append(dimension)
                delta_auc_table['Delta AUC'].append(delta_auc)
                if delta_auc > max_delta_auc:
                    max_delta_auc = delta_auc

    print(f"Maximum Delta AUC: {max_delta_auc}")
    delta_auc_df = pd.DataFrame(delta_auc_table)
    return delta_auc_df

In [None]:
def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Nur für Subgruppen speichern, nicht für "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    # Rufe die neue Delta AUC-Funktion auf und gib das Ergebnis als Tabelle zurück
    delta_auc_df = calculate_delta_auc(auc_values_per_disease)
    print(delta_auc_df)

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df


In [None]:
training_size = 2000
use_pca = True        
n_components = 10     

xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)
xgb_metrics = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

In [None]:
delta_auc_df = calculate_delta_auc(auc_values_per_disease)
print(delta_auc_df)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.decomposition import PCA
import pandas as pd
from collections import defaultdict

# Liste der Krankheiten
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

# Delta AUC Berechnungsfunktion
def calculate_delta_auc(auc_values_per_disease):
    """Berechnet Delta AUC für jede Krankheit und jede Metric Dimension"""
    delta_auc_table = {'Disease': [], 'Metric Dimension': [], 'Delta AUC': []}
    max_delta_auc = 0

    for disease, dim_dict in auc_values_per_disease.items():
        for dimension, subgroups in dim_dict.items():
            if len(subgroups) > 1:  # Berechne Delta nur, wenn mehrere Subgruppen vorhanden sind
                delta_auc = max(subgroups.values()) - min(subgroups.values())
                delta_auc_table['Disease'].append(disease)
                delta_auc_table['Metric Dimension'].append(dimension)
                delta_auc_table['Delta AUC'].append(delta_auc)
                if delta_auc > max_delta_auc:
                    max_delta_auc = delta_auc

    print(f"Maximum Delta AUC: {max_delta_auc}")
    delta_auc_df = pd.DataFrame(delta_auc_table)
    return delta_auc_df

# Training und Metriken-Funktion
def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Nur für Subgruppen speichern, nicht für "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    # Rufe die Delta AUC-Berechnungsfunktion auf und erstelle das DataFrame delta_auc_df
    delta_auc_df = calculate_delta_auc(auc_values_per_disease)
    print("Delta AUC Tabelle:")
    print(delta_auc_df)

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df

# Beispielaufruf der Funktion und Analyse der Ergebnisse
training_size = 2000
use_pca = True        
n_components = 10     

xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)

# Trainiere das Modell und speichere die Leistungsmetriken und Delta AUC-Werte
metrics_df, delta_auc_df = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

# Zeige die Leistung für jede Krankheit und Delta AUC an
print("Leistung für jede Krankheit:")
print(metrics_df[metrics_df['Metric Dimension'] == 'all'][['Disease', 'Accuracy', 'F1 Score', 'Recall', 'Precision', 'AUC']])

print("\nDelta AUC für jede Krankheit und Dimension:")
print(delta_auc_df)


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.decomposition import PCA
import pandas as pd
from collections import defaultdict

# Liste der Krankheiten
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

# Delta AUC Berechnungsfunktion
def calculate_delta_auc(auc_values_per_disease):
    """Berechnet Delta AUC für jede Krankheit und jede Metric Dimension"""
    delta_auc_table = {'Disease': [], 'Metric Dimension': [], 'Delta AUC': []}
    max_delta_auc = 0

    for disease, dim_dict in auc_values_per_disease.items():
        for dimension, subgroups in dim_dict.items():
            if len(subgroups) > 1:  # Berechne Delta nur, wenn mehrere Subgruppen vorhanden sind
                delta_auc = max(subgroups.values()) - min(subgroups.values())
                delta_auc_table['Disease'].append(disease)
                delta_auc_table['Metric Dimension'].append(dimension)
                delta_auc_table['Delta AUC'].append(delta_auc)
                if delta_auc > max_delta_auc:
                    max_delta_auc = delta_auc

    delta_auc_df = pd.DataFrame(delta_auc_table)
    return delta_auc_df

# Funktion zur Anzeige der Delta AUC-Tabellen für jede metric_dimension
def display_delta_auc_tables(delta_auc_df):
    """Zeigt Delta AUC-Tabellen für jede metric_dimension und gibt den maximalen Delta AUC aus."""
    
    # Teil-DataFrames für jede metric_dimension
    sex_delta_auc = delta_auc_df[delta_auc_df['Metric Dimension'] == 'sex']
    race_delta_auc = delta_auc_df[delta_auc_df['Metric Dimension'] == 'race']
    insurance_delta_auc = delta_auc_df[delta_auc_df['Metric Dimension'] == 'insurance_type']
    
    # Ausgabe der Tabellen für jede metric_dimension
    print("\nDelta AUC Tabelle für 'sex':")
    print(sex_delta_auc.to_string(index=False))  # Entfernt Index-Spalte
    
    print("\nDelta AUC Tabelle für 'race':")
    print(race_delta_auc.to_string(index=False))
    
    print("\nDelta AUC Tabelle für 'insurance_type':")
    print(insurance_delta_auc.to_string(index=False))
    
    # Maximalen Delta AUC-Wert berechnen und ausgeben
    max_delta_auc = delta_auc_df['Delta AUC'].max()
    print(f"\nMaximaler Delta AUC über alle Gruppen hinweg: {max_delta_auc:.3f}")

# Training und Metriken-Funktion
def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Nur für Subgruppen speichern, nicht für "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    # Rufe die Delta AUC-Berechnungsfunktion auf und erstelle das DataFrame delta_auc_df
    delta_auc_df = calculate_delta_auc(auc_values_per_disease)

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df

# Beispielaufruf der Funktion und Analyse der Ergebnisse
training_size = 2000
use_pca = True        
n_components = 10     

xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)

# Trainiere das Modell und speichere die Leistungsmetriken und Delta AUC-Werte
metrics_df, delta_auc_df = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

# Zeige die Delta AUC-Tabellen für jede metric_dimension an
display_delta_auc_tables(delta_auc_df)


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.decomposition import PCA
import pandas as pd
from collections import defaultdict

# Liste der Krankheiten
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

# Delta AUC Berechnungsfunktion
def calculate_delta_auc(auc_values_per_disease):
    """Berechnet Delta AUC für jede Krankheit und jede Metric Dimension und erstellt Tabellen für jede Dimension"""
    sex_table = []
    race_table = []
    insurance_type_table = []
    
    for disease, dim_dict in auc_values_per_disease.items():
        # Tabelle für 'sex'
        if 'sex' in dim_dict:
            male_auc = dim_dict['sex'].get('male', None)
            female_auc = dim_dict['sex'].get('female', None)
            delta_auc_sex = abs(male_auc - female_auc) if male_auc is not None and female_auc is not None else None
            sex_table.append([disease, male_auc, female_auc, delta_auc_sex])

        # Tabelle für 'race'
        if 'race' in dim_dict:
            asian_auc = dim_dict['race'].get('asian', None)
            white_auc = dim_dict['race'].get('white', None)
            black_auc = dim_dict['race'].get('black', None)
            delta_auc_race = (max([asian_auc, white_auc, black_auc]) - 
                              min([auc for auc in [asian_auc, white_auc, black_auc] if auc is not None])) \
                              if None not in [asian_auc, white_auc, black_auc] else None
            race_table.append([disease, asian_auc, white_auc, black_auc, delta_auc_race])

        # Tabelle für 'insurance_type'
        if 'insurance_type' in dim_dict:
            private_auc = dim_dict['insurance_type'].get('private', None)
            medicaid_auc = dim_dict['insurance_type'].get('medicaid', None)
            medicare_auc = dim_dict['insurance_type'].get('medicare', None)
            delta_auc_insurance = (max([private_auc, medicaid_auc, medicare_auc]) - 
                                   min([auc for auc in [private_auc, medicaid_auc, medicare_auc] if auc is not None])) \
                                   if None not in [private_auc, medicaid_auc, medicare_auc] else None
            insurance_type_table.append([disease, private_auc, medicaid_auc, medicare_auc, delta_auc_insurance])
    
    # Erstelle DataFrames für jede Tabelle
    sex_df = pd.DataFrame(sex_table, columns=['Disease', 'AUC (Male)', 'AUC (Female)', 'AUC Delta'])
    race_df = pd.DataFrame(race_table, columns=['Disease', 'AUC (Asian)', 'AUC (White)', 'AUC (Black)', 'AUC Delta'])
    insurance_type_df = pd.DataFrame(insurance_type_table, columns=['Disease', 'AUC (Private)', 'AUC (Medicaid)', 'AUC (Medicare)', 'AUC Delta'])

    return sex_df, race_df, insurance_type_df

# Training und Metriken-Funktion
def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Nur für Subgruppen speichern, nicht für "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    delta_auc_df = calculate_delta_auc(auc_values_per_disease)

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df

# Beispielaufruf der Funktion und Analyse der Ergebnisse
training_size = 2000
use_pca = True        
n_components = 10     

xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)

# Trainiere das Modell und speichere die Leistungsmetriken und Delta AUC-Werte
metrics_df, delta_auc_df = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

# Zeige die Delta AUC-Tabellen für jede metric_dimension an
sex_df, race_df, insurance_type_df = delta_auc_df
print("\nDelta AUC Tabelle für 'sex':")
print(sex_df)
print("\nDelta AUC Tabelle für 'race':")
print(race_df)
print("\nDelta AUC Tabelle für 'insurance_type':")
print(insurance_type_df)


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.decomposition import PCA
import pandas as pd
from collections import defaultdict

# Liste der Krankheiten
diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
            'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
            'Pleural Effusion', 'Pleural Other', 'Fracture']

# Delta AUC Berechnungsfunktion
def calculate_delta_auc(auc_values_per_disease):
    """Berechnet Delta AUC für jede Krankheit und jede Metric Dimension und erstellt Tabellen für jede Dimension"""
    sex_table = []
    race_table = []
    insurance_type_table = []
    
    for disease, dim_dict in auc_values_per_disease.items():
        # Tabelle für 'sex'
        if 'sex' in dim_dict:
            male_auc = dim_dict['sex'].get('male', None)
            female_auc = dim_dict['sex'].get('female', None)
            delta_auc_sex = abs(male_auc - female_auc) if male_auc is not None and female_auc is not None else None
            sex_table.append([disease, male_auc, female_auc, delta_auc_sex])

        # Tabelle für 'race'
        if 'race' in dim_dict:
            asian_auc = dim_dict['race'].get('asian', None)
            white_auc = dim_dict['race'].get('white', None)
            black_auc = dim_dict['race'].get('black', None)
            delta_auc_race = (max([asian_auc, white_auc, black_auc]) - 
                              min([auc for auc in [asian_auc, white_auc, black_auc] if auc is not None])) \
                              if None not in [asian_auc, white_auc, black_auc] else None
            race_table.append([disease, asian_auc, white_auc, black_auc, delta_auc_race])

        # Tabelle für 'insurance_type'
        if 'insurance_type' in dim_dict:
            private_auc = dim_dict['insurance_type'].get('private', None)
            medicaid_auc = dim_dict['insurance_type'].get('medicaid', None)
            medicare_auc = dim_dict['insurance_type'].get('medicare', None)
            delta_auc_insurance = (max([private_auc, medicaid_auc, medicare_auc]) - 
                                   min([auc for auc in [private_auc, medicaid_auc, medicare_auc] if auc is not None])) \
                                   if None not in [private_auc, medicaid_auc, medicare_auc] else None
            insurance_type_table.append([disease, private_auc, medicaid_auc, medicare_auc, delta_auc_insurance])
    
    # Erstelle DataFrames für jede Tabelle
    sex_df = pd.DataFrame(sex_table, columns=['Disease', 'AUC (Male)', 'AUC (Female)', 'AUC Delta'])
    race_df = pd.DataFrame(race_table, columns=['Disease', 'AUC (Asian)', 'AUC (White)', 'AUC (Black)', 'AUC Delta'])
    insurance_type_df = pd.DataFrame(insurance_type_table, columns=['Disease', 'AUC (Private)', 'AUC (Medicaid)', 'AUC (Medicare)', 'AUC Delta'])

    return sex_df, race_df, insurance_type_df

# Training und Metriken-Funktion
def train_model(x_train, y_train, x_test, y_test, model, metric_dimensions=[], columns_to_drop=[], n_components=None):
    multi_output_model = MultiOutputClassifier(model)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    x_train_subset = x_train.drop(columns=columns_to_drop, errors='ignore')
    x_test_subset = x_test.drop(columns=columns_to_drop, errors='ignore')
    
    if n_components:
        pca = PCA(n_components=n_components)
        x_train_subset = pca.fit_transform(x_train_subset)
        x_test_subset = pca.transform(x_test_subset)
        print(f"PCA used with {n_components} components.")
    else:
        print("PCA not used.")

    metrics_data = {
        'Metric Dimension': [],
        'Subgroup': [],
        'Disease': [],
        'Accuracy': [],
        'F1 Score': [],
        'Recall': [],
        'Precision': [],
        'AUC': []
    }

    # Dictionary to store AUC values for calculating Delta AUC
    auc_values_per_disease = defaultdict(lambda: defaultdict(dict))

    def calculate_metrics(y_true, y_pred, y_pred_proba=None, disease_label="Overall"):
        metrics_data['Metric Dimension'].append(metric_dim)
        metrics_data['Subgroup'].append(subgroup)
        metrics_data['Disease'].append(disease_label)
        metrics_data['Accuracy'].append(accuracy_score(y_true, y_pred))
        metrics_data['F1 Score'].append(f1_score(y_true, y_pred, average='micro'))
        metrics_data['Recall'].append(recall_score(y_true, y_pred, average='micro'))
        metrics_data['Precision'].append(precision_score(y_true, y_pred, average='micro'))

        if y_pred_proba is not None:
            try:
                auc = roc_auc_score(y_true, y_pred_proba)
                metrics_data['AUC'].append(auc)
                if metric_dim != "all":  # Nur für Subgruppen speichern, nicht für "overall"
                    auc_values_per_disease[disease_label][metric_dim][subgroup] = auc
            except ValueError:
                metrics_data['AUC'].append(None)
        else:
            metrics_data['AUC'].append(None)

    try:
        multi_output_model.fit(x_train_subset, y_train)
        y_test_preds = pd.DataFrame(multi_output_model.predict(x_test_subset), columns=diseases)
        
        if hasattr(model, "predict_proba"):
            y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))})
        else:
            y_test_preds_proba = None

        metric_dim, subgroup = "all", "all"
        calculate_metrics(y_test, y_test_preds, y_test_preds_proba)
        
        for disease in diseases:
            y_true_disease = y_test[disease]
            y_pred_disease = y_test_preds[disease]
            y_pred_proba_disease = y_test_preds_proba[disease] if y_test_preds_proba is not None else None
            calculate_metrics(y_true_disease, y_pred_disease, y_pred_proba_disease, disease_label=disease)
    except ValueError:
        print("Multi-output not supported. Training each disease separately.")
        for disease in diseases:
            model.fit(x_train_subset, y_train[disease])
            y_test_preds = model.predict(x_test_subset)
            y_test_preds_proba = model.predict_proba(x_test_subset)[:, 1] if hasattr(model, "predict_proba") else None
            metric_dim, subgroup = "all", "all"
            calculate_metrics(y_test[disease], y_test_preds, y_test_preds_proba, disease_label=disease)

    for metric_dim in metric_dimensions:
        for subgroup in x_test[metric_dim].unique():
            mask = (x_test[metric_dim] == subgroup)
            x_test_subgroup = x_test_subset[mask]
            y_test_subgroup = y_test.loc[mask]
            try:
                y_test_preds_subgroup = y_test_preds.loc[mask]
                y_test_preds_proba_subgroup = y_test_preds_proba.loc[mask] if y_test_preds_proba is not None else None
                calculate_metrics(y_test_subgroup, y_test_preds_subgroup, y_test_preds_proba_subgroup)
                for disease in diseases:
                    y_true_subgroup_disease = y_test_subgroup[disease]
                    y_pred_subgroup_disease = y_test_preds_subgroup[disease]
                    y_pred_proba_subgroup_disease = y_test_preds_proba_subgroup[disease] if y_test_preds_proba_subgroup is not None else None
                    calculate_metrics(y_true_subgroup_disease, y_pred_subgroup_disease, y_pred_proba_subgroup_disease, disease_label=disease)
            except Exception:
                for disease in diseases:
                    y_test_subgroup_disease = y_test_subgroup[disease]
                    y_test_preds_disease = model.predict(x_test_subgroup)
                    y_test_preds_proba_disease = model.predict_proba(x_test_subgroup)[:, 1] if hasattr(model, "predict_proba") else None
                    calculate_metrics(y_test_subgroup_disease, y_test_preds_disease, y_test_preds_proba_disease, disease_label=disease)

    delta_auc_df = calculate_delta_auc(auc_values_per_disease)

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df = metrics_df.reset_index(drop=True)
    return metrics_df, delta_auc_df


# Beispielaufruf der Funktion und Analyse der Ergebnisse
training_size = 2000
use_pca = True        
n_components = 10     

xgb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=30, verbose=1, random_state=42, n_iter_no_change=5)

# Trainiere das Modell und speichere die Leistungsmetriken und Delta AUC-Werte
metrics_df, delta_auc_df = train_model(
    x_train=x_train.iloc[:training_size], 
    y_train=y_train.iloc[:training_size], 
    x_test=x_test, 
    y_test=y_test, 
    model=xgb, 
    metric_dimensions=["sex", "race", "insurance_type"],
    n_components=n_components   
)

# Zeige die Delta AUC-Tabellen für jede metric_dimension an
sex_df, race_df, insurance_type_df = delta_auc_df
print("\nDelta AUC Tabelle für 'sex':")
print(sex_df)
print("\nDelta AUC Tabelle für 'race':")
print(race_df)
print("\nDelta AUC Tabelle für 'insurance_type':")
print(insurance_type_df)
