In [2]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from collections import defaultdict
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score, precision_recall_curve, auc

### Read the CSV

In [3]:
test = pd.read_csv("../model_dev/densenet_data/densenet_test_embeddings.csv", quotechar='"', on_bad_lines='skip')
train = pd.read_csv("../model_dev/densenet_data/densenet_train_embeddings.csv", quotechar='"', on_bad_lines='skip')
# valid = pd.read_csv("../model_dev/densenet_data/densenet_valid_embeddings.csv", quotechar='"', on_bad_lines='skip')

print(test.columns)
test.head()

Index(['path_to_image', 'path_to_dcm', 'age', 'sex', 'race', 'insurance_type',
       'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
       'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
       'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
       'Fracture', 'Support Devices', 'embeddings'],
      dtype='object')


Unnamed: 0,path_to_image,path_to_dcm,age,sex,race,insurance_type,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,...,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,embeddings
0,train/patient47347/study3/view1_frontal.jpg,train/patient47347/study3/view1_frontal.dcm,78.0,1,0,1,0,0,1,0,...,1,0,0,1,0,1,0,1,1,"[0.0029132624622434378, 0.1020001769065857, 0...."
1,train/patient37527/study12/view1_frontal.jpg,train/patient37527/study12/view1_frontal.dcm,63.0,0,1,2,0,0,0,0,...,0,0,0,0,0,1,0,0,0,"[0.0014348188415169716, 0.0543656125664711, 0...."
2,train/patient41208/study9/view1_frontal.jpg,train/patient41208/study9/view1_frontal.dcm,70.0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[0.001982336398214102, 0.040021587163209915, 0..."
3,train/patient39357/study1/view1_frontal.jpg,train/patient39357/study1/view1_frontal.dcm,79.0,1,1,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,"[0.001741771469824016, 0.0560498870909214, 0.1..."
4,train/patient31982/study4/view1_frontal.jpg,train/patient31982/study4/view1_frontal.dcm,67.0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,"[9.678312198957428e-05, 0.12247737497091293, 0..."


### Convert embeddings from str to list (a bit long for large data sets)

In [4]:
test['embeddings'] = test['embeddings'].apply(ast.literal_eval)

In [5]:
train['embeddings'] = train['embeddings'].apply(ast.literal_eval)

### Remove columns

In [6]:
test = test.drop(columns=['path_to_image', 'path_to_dcm'])
train = train.drop(columns=['path_to_image', 'path_to_dcm'])


### Remove rows that were not processed (embeddings = 0)

In [7]:
initial_size = test.shape[0] 

# The previous logic with transforming the list to string and filtering on the length of said string is not necessarily stable and misleading.
# Let's implement a more explicit test for what we actually care about: 

test = test[test['embeddings'].apply(type) == list]

final_size = test.shape[0] 

print(f'Number of test removed rows = {initial_size - final_size}')

initial_size = train.shape[0] 

train = train[train['embeddings'].apply(type) == list]

final_size = train.shape[0] 

print(f'Number of train removed rows = {initial_size - final_size}')

Number of test removed rows = 51
Number of train removed rows = 67


### Convert age to binary to study bias

In [8]:
a = 70
test['age'] = (test['age'] >= a).astype(int)
train['age'] = (train['age'] >= a).astype(int)

### Create artificial training distribution

In [9]:
print("Initial sex Distribution:")
print(train['sex'].value_counts())

print("\nInitial Race Distribution:")
print(train['race'].value_counts())

print("\nInitial Age Distribution:")
print(train['age'].value_counts())

print("\nInitial Health Distribution:")
print(train['insurance_type'].value_counts())

Initial sex Distribution:
sex
0    38998
1    28198
Name: count, dtype: int64

Initial Race Distribution:
race
0    52553
1     9844
2     4799
Name: count, dtype: int64

Initial Age Distribution:
age
0    42267
1    24929
Name: count, dtype: int64

Initial Health Distribution:
insurance_type
1    43076
2    18340
0     5780
Name: count, dtype: int64


In [10]:
# diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']
# other_cols = ['age', 'sex', 'race', 'insurance_type','embeddings']

# train = train[diseases + other_cols]


# # Create a list to store resampled data
# resampled_data = []

# # Determine the size of the largest group based on `sex`, `race`, `insurance_type`
# max_size = train.groupby(['sex', 'race', 'age', 'insurance_type']).size().max()

# # Loop over each group combination of `sex`, `race`, `insurance_type`
# for group, data in train.groupby(['sex', 'race', 'age', 'insurance_type']):

#     # Calculate the number of times we need to repeat the data to reach `max_size`
#     num_repeats = max_size // len(data)
#     remainder = max_size % len(data)

#     # Repeat the data `num_repeats` times and add a random sample to reach `max_size`
#     resampled_group = pd.concat([data] * num_repeats + [data.sample(remainder, random_state=42)])

#     # Append to the list of resampled data
#     resampled_data.append(resampled_group)

# # Combine all resampled groups back into a single DataFrame
# train = pd.concat(resampled_data, ignore_index=True)


# train = train.sample(frac=1).reset_index(drop=True)
# train = train[:65000]

In [11]:
# print("\nFinal sex Distribution:")
# print(train['sex'].value_counts())

# print("\nFinal Race Distribution:")
# print(train['race'].value_counts())

# print("\nFinal Age Distribution:")
# print(train['age'].value_counts())

# print("\nFinal Health Distribution:")
# print(train['insurance_type'].value_counts())

### Train test

### Choose which subgroup doing a model for

In [12]:
train1 = train[train["age"]==0]
test1 = test[test["age"]==0]

train_embeddings = pd.DataFrame(train1['embeddings'].tolist())
test_embeddings = pd.DataFrame(test1['embeddings'].tolist())
# valid_embeddings = pd.DataFrame(valid['embeddings'].tolist())

diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']

# Labels for train and test
y_train = train1[diseases]
y_test = test1[diseases]

# Create x_train and x_test
x_train = pd.concat([train1.reset_index(), train_embeddings], axis=1)
x_test =  pd.concat([test1.reset_index(), test_embeddings], axis=1)

x_train.drop(columns=["embeddings"] + diseases, inplace=True)
x_test.drop(columns=["embeddings"] + diseases, inplace=True)



# Labels for train and test
y_train = train1[diseases]
y_test = test1[diseases]
# y_valid = valid[diseases]
y_no_finding = test1["No Finding"]
y_sex = test1['sex']
y_race = test1['race']
y_insurance = test1['insurance_type']
y_age = test1['age']


# Step 1: Standardize the embeddings_list to have mean 0 and variance 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(np.stack(train_embeddings.values))
X_test_scaled = scaler.transform(np.stack(test_embeddings.values))

# Step 2: Set target variance threshold (e.g., 95%)
variance_threshold = 0.95

# Step 3: Fit PCA to determine the optimal number of components based on variance threshold
pca_full = PCA()
pca_full.fit(X_train_scaled)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)

# Step 4: Find the number of components that meets the variance threshold
optimal_components = np.argmax(cumulative_variance >= variance_threshold) + 1
print(f"Optimal number of components to retain {variance_threshold*100}% variance: {optimal_components}")

# Apply PCA if wanted
pca = PCA(n_components=optimal_components)
x_train_subset = pca.fit_transform(X_train_scaled)
x_test_subset = pca.transform(X_test_scaled)


Optimal number of components to retain 95.0% variance: 300


In [13]:
def train_model(x_train, y_train, x_test, y_test, model):
    multi_output_model = MultiOutputClassifier(model)
    

    multi_output_model.fit(x_train_subset, y_train)
    
    if hasattr(model, "predict_proba"):
        y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))}) # Dataframe with probabilites 
    else:
        y_test_preds_proba = None


    return y_test_preds_proba
        

xgb_model = XGBClassifier(
    use_label_encoder=False,      
    eval_metric='logloss',   
    learning_rate=0.1,
    random_state=42
)

y_pred = train_model(
    x_train=x_train_subset, 
    y_train=y_train, 
    x_test=x_test_subset, 
    y_test=y_test, 
    model=xgb_model, 
)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [14]:
predictions1 = y_pred.values
targets1 = y_test.values

In [15]:
train2 = train[train["age"]==1]
test2 = test[test["age"]==1]

train_embeddings = pd.DataFrame(train2['embeddings'].tolist())
test_embeddings = pd.DataFrame(test2['embeddings'].tolist())
# valid_embeddings = pd.DataFrame(valid['embeddings'].tolist())

diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']

# Labels for train and test
y_train = train2[diseases]
y_test = test2[diseases]

# Create x_train and x_test
x_train = pd.concat([train2.reset_index(), train_embeddings], axis=1)
x_test =  pd.concat([test2.reset_index(), test_embeddings], axis=1)

x_train.drop(columns=["embeddings"] + diseases, inplace=True)
x_test.drop(columns=["embeddings"] + diseases, inplace=True)



# Labels for train and test
y_train = train2[diseases]
y_test = test2[diseases]
# y_valid = valid[diseases]
y_no_finding = test2["No Finding"]
y_sex = test2['sex']
y_race = test2['race']
y_insurance = test2['insurance_type']
y_age = test2['age']


# Step 1: Standardize the embeddings_list to have mean 0 and variance 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(np.stack(train_embeddings.values))
X_test_scaled = scaler.transform(np.stack(test_embeddings.values))

# Step 2: Set target variance threshold (e.g., 95%)
variance_threshold = 0.95

# Step 3: Fit PCA to determine the optimal number of components based on variance threshold
pca_full = PCA()
pca_full.fit(X_train_scaled)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)

# Step 4: Find the number of components that meets the variance threshold
optimal_components = np.argmax(cumulative_variance >= variance_threshold) + 1
print(f"Optimal number of components to retain {variance_threshold*100}% variance: {optimal_components}")

# Apply PCA if wanted
pca = PCA(n_components=optimal_components)
x_train_subset = pca.fit_transform(X_train_scaled)
x_test_subset = pca.transform(X_test_scaled)


Optimal number of components to retain 95.0% variance: 310


In [16]:
def train_model(x_train, y_train, x_test, y_test, model):
    multi_output_model = MultiOutputClassifier(model)
    

    multi_output_model.fit(x_train_subset, y_train)
    
    if hasattr(model, "predict_proba"):
        y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))}) # Dataframe with probabilites 
    else:
        y_test_preds_proba = None


    return y_test_preds_proba
        

xgb_model = XGBClassifier(
    use_label_encoder=False,      
    eval_metric='logloss',   
    learning_rate=0.1,
    random_state=42
)

y_pred = train_model(
    x_train=x_train_subset, 
    y_train=y_train, 
    x_test=x_test_subset, 
    y_test=y_test, 
    model=xgb_model, 
)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [17]:
predictions2 = y_pred.values
targets2 = y_test.values

In [18]:
train3 = train[train["age"]==1]
test3 = test[test["age"]==1]

train_embeddings = pd.DataFrame(train2['embeddings'].tolist())
test_embeddings = pd.DataFrame(test2['embeddings'].tolist())
# valid_embeddings = pd.DataFrame(valid['embeddings'].tolist())

diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']

# Labels for train and test
y_train = train3[diseases]
y_test = test3[diseases]

# Create x_train and x_test
x_train = pd.concat([train3.reset_index(), train_embeddings], axis=1)
x_test =  pd.concat([test3.reset_index(), test_embeddings], axis=1)

x_train.drop(columns=["embeddings"] + diseases, inplace=True)
x_test.drop(columns=["embeddings"] + diseases, inplace=True)



# Labels for train and test
y_train = train3[diseases]
y_test = test3[diseases]
# y_valid = valid[diseases]
y_no_finding = test3["No Finding"]
y_sex = test3['sex']
y_race = test3['race']
y_insurance = test3['insurance_type']
y_age = test3['age']


# Step 1: Standardize the embeddings_list to have mean 0 and variance 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(np.stack(train_embeddings.values))
X_test_scaled = scaler.transform(np.stack(test_embeddings.values))

# Step 2: Set target variance threshold (e.g., 95%)
variance_threshold = 0.95

# Step 3: Fit PCA to determine the optimal number of components based on variance threshold
pca_full = PCA()
pca_full.fit(X_train_scaled)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)

# Step 4: Find the number of components that meets the variance threshold
optimal_components = np.argmax(cumulative_variance >= variance_threshold) + 1
print(f"Optimal number of components to retain {variance_threshold*100}% variance: {optimal_components}")

# Apply PCA if wanted
pca = PCA(n_components=optimal_components)
x_train_subset = pca.fit_transform(X_train_scaled)
x_test_subset = pca.transform(X_test_scaled)


Optimal number of components to retain 95.0% variance: 310


In [19]:
def train_model(x_train, y_train, x_test, y_test, model):
    multi_output_model = MultiOutputClassifier(model)
    

    multi_output_model.fit(x_train_subset, y_train)
    
    if hasattr(model, "predict_proba"):
        y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))}) # Dataframe with probabilites 
    else:
        y_test_preds_proba = None


    return y_test_preds_proba
        

xgb_model = XGBClassifier(
    use_label_encoder=False,      
    eval_metric='logloss',   
    learning_rate=0.1,
    random_state=42
)

y_pred = train_model(
    x_train=x_train_subset, 
    y_train=y_train, 
    x_test=x_test_subset, 
    y_test=y_test, 
    model=xgb_model, 
)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [20]:
predictions3 = y_pred.values
targets3 = y_test.values

In [21]:
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']

In [22]:

# Calculate metrics for each disease and for each class

metrics_1 = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions1[:, idx]
    disease_true = targets1[:, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_1[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
  

In [23]:
  
metrics2 = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions2[y_sex == 0, idx]
    disease_true = targets2[y_sex == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics2[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }



In [24]:
metrics_3 = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions3[:, idx]
    disease_true = targets3[:, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_3[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    


In [25]:
# Initialize an empty list to store the data
data_sex = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics2.items():
    # Extract AUC and rates from dictionaries
    
    auc_1 = metrics_1[disease]['AUC'] *100
    auc_2 = metrics2[disease]['AUC'] *100
    tp_rate_1 = metrics_1[disease]['TP Rate'] *100
    tp_rate_2 = metrics2[disease]['TP Rate'] *100
    fp_rate_1 = metrics_1[disease]['FP Rate'] *100
    fp_rate_2 = metrics2[disease]['FP Rate'] *100
    
    # Calculate delta AUC and equality of odds
    delta_auc_sex = abs(auc_1 - auc_2)
    eq_odds_sex = 0.5 * (abs(tp_rate_1 - tp_rate_2) + abs(fp_rate_1 - fp_rate_2))
    
    # Append to the data list
    data_sex.append([disease, auc_1, auc_2, delta_auc_sex, eq_odds_sex])

# Create a DataFrame
df_sex = pd.DataFrame(data_sex, columns=['Disease', 'AUC_1', 'AUC_2', 'Delta AUC', 'EqOdds'])


# Styling the DataFrame
styled_df = df_sex.style.format({
    'AUC_1': "{:.3f}", 
    'AUC_2': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUC_1', 'AUC_2', 'Delta AUC', 'EqOdds'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUC_1,AUC_2,Delta AUC,EqOdds
0,Cardiomegaly,81.263,77.021,4.243,1.553
1,Lung Opacity,70.36,63.56,6.799,10.586
2,Edema,78.838,76.378,2.46,8.286
3,Atelectasis,62.816,58.709,4.107,0.007
4,Pneumothorax,73.621,70.958,2.663,0.936
5,Pleural Effusion,81.024,79.365,1.659,6.622


In [27]:
# # Initialize an empty list to store the data
# data_race = []

# # Iterate over the diseases in the metrics dictionary
# for disease, values in metrics2.items():
   
#     auc_groups = [
#         metrics_1[disease]['AUC'] *100,
#         metrics2[disease]['AUC'] *100,
#         metrics_3[disease]['AUC'] *100
#     ]
#     tp_rates = [
#         metrics_1[disease]['TP Rate'] *100,
#         metrics2[disease]['TP Rate'] *100,
#         metrics_3[disease]['TP Rate'] *100
#     ]
#     fp_rates = [
#         metrics_1[disease]['FP Rate'] *100,
#         metrics2[disease]['FP Rate'] *100,
#         metrics_3[disease]['FP Rate'] *100
#     ]

#     # Calculate the maximum delta AUC
#     delta_auc_race = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

#     # Calculate the maximum equality of odds
#     eq_odds_race = max(
#         0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
#         for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
#     )

#     # Append to the data list
#     data_race.append([disease] + auc_groups + [delta_auc_race, eq_odds_race])



# df_sex = pd.DataFrame(data_sex, columns=['Disease', 'AUC_1', 'AUC_2', 'AUC_3', 'Delta AUC', 'EqOdds'])


# # Styling the DataFrame
# styled_df = df_sex.style.format({
#     'AUC_1': "{:.3f}", 
#     'AUC_2': "{:.3f}",
#     'AUC_3': "{:.3f}",
#     'Delta AUC': "{:.3f}",
#     'EqOdds': "{:.3f}"
# }).background_gradient(cmap='viridis', subset=['AUC_1', 'AUC_2', 'AUC_3', 'Delta AUC', 'EqOdds'])

# # Display the styled DataFrame
# styled_df