In [2]:
import pandas as pd

df_CXR_Normal = pd.read_csv('Dataset_CXR_Normal.csv')
df_CXR_AI = pd.read_csv('Dataset_CXR_AI.csv')
df_MURA_Normal = pd.read_csv('Dataset_MURA_Normal.csv')
df_MURA_AI = pd.read_csv('Dataset_MURA_AI.csv')
df_Mammography_Normal = pd.read_csv('Dataset_Mammography_Normal.csv')
df_Mammography_AI = pd.read_csv('Dataset_Mammography_AI.csv')
df_Pneumothorax_Normal = pd.read_csv('Dataset_Pneumothorax_Normal.csv')
df_Pneumothorax_AI = pd.read_csv('Dataset_Pneumothorax_AI.csv')

## MURA - Accuracy

In [3]:
import pandas as pd
from scipy.stats import shapiro
from scipy.stats import wilcoxon
from sklearn.metrics import accuracy_score

# Encode 'Normal' as 0 and 'Abnormal' as 1 in the 'labelName_user_annotation' and 'labelName_model_output' columns
df_MURA_AI['annotated_Class'] = df_MURA_AI['labelName_user_annotation'].map({'Normal': 0, 'Abnormal': 1})

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_MURA_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_MURA_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'Class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'Class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store accuracy data
accuracy_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate accuracy with AI for the current user
    accuracy_with_ai = accuracy_score(user_data_ai['annotated_Class'], user_data_ai['Class'])

    # Calculate accuracy without AI for the current user
    accuracy_without_ai = accuracy_score(user_data_normal['annotated_Class'], user_data_normal['Class'])

    # Append the results to the accuracy_data list as a dictionary
    accuracy_data.append({'updatedById': user_id, 'AccuracyWithAI': accuracy_with_ai, 'AccuracyWithoutAI': accuracy_without_ai})

# Create a DataFrame from the accuracy_data list
accuracy_MURA_df = pd.DataFrame(accuracy_data)
accuracy_MURA_df

Unnamed: 0,updatedById,AccuracyWithAI,AccuracyWithoutAI
0,U_60pMP6,0.733333,0.733333
1,U_60pVk6,0.666667,0.733333
2,U_69P3RQ,0.933333,0.6
3,U_69PnEQ,0.8,0.666667
4,U_6ARnj8,0.866667,0.6
5,U_6wPe98,0.6,0.466667
6,U_OBKAWQ,0.733333,0.666667
7,U_OX2wN6,0.933333,0.733333
8,U_OjydP8,0.8,1.0
9,U_OppZ0O,0.8,0.666667


## Pneumothorax - Accuracy

In [4]:
import pandas as pd
from scipy.stats import shapiro
from scipy.stats import wilcoxon
from sklearn.metrics import accuracy_score

# Encode 'No Pneumothorax' as 0 and 'Pneumothorax' as 1 in the 'labelName_user_annotation' column
df_Pneumothorax_AI['annotated_Class'] = df_Pneumothorax_AI['labelName_user_annotation'].map({'No Pneumothorax': 0, 'Pneumothorax': 1})
df_Pneumothorax_Normal['annotated_Class'] = df_Pneumothorax_Normal['labelName'].map({'No Pneumothorax': 0, 'Pneumothorax': 1})

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_Pneumothorax_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_Pneumothorax_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store accuracy data
accuracy_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate accuracy with AI for the current user
    accuracy_with_ai = accuracy_score(user_data_ai['annotated_Class'], user_data_ai['class'])

    # Calculate accuracy without AI for the current user
    accuracy_without_ai = accuracy_score(user_data_normal['annotated_Class'], user_data_normal['class'])

    # Append the results to the accuracy_data list as a dictionary
    accuracy_data.append({'updatedById': user_id, 'AccuracyWithAI': accuracy_with_ai, 'AccuracyWithoutAI': accuracy_without_ai})

# Create a DataFrame from the accuracy_data list
accuracy_Pneumothorax_df = pd.DataFrame(accuracy_data)
accuracy_Pneumothorax_df

Unnamed: 0,updatedById,AccuracyWithAI,AccuracyWithoutAI
0,U_60pMP6,0.928571,0.866667
1,U_60pVk6,0.785714,0.933333
2,U_69P3RQ,0.733333,0.933333
3,U_69PnEQ,0.8,0.692308
4,U_6ARnj8,0.928571,0.866667
5,U_6wPe98,0.866667,0.666667
6,U_OBKAWQ,0.866667,0.8
7,U_OX2wN6,0.857143,0.857143
8,U_OjydP8,0.866667,0.6
9,U_OppZ0O,0.733333,0.8


## Mammography - Accuracy

In [5]:
import pandas as pd
from scipy.stats import shapiro
from scipy.stats import wilcoxon
from sklearn.metrics import accuracy_score


df_Mammography_Normal['annotated_Class'] = df_Mammography_Normal['labelName'].map({'Benign': 0, 'Normal': 0,'Probably Benign':0, 'Probably Malignant':1, 'Malignant': 1})
df_Mammography_Normal['Class'] = df_Mammography_Normal['pathology'].map({'BENIGN': 0,'BENIGN_WITHOUT_CALLBACK':0, 'MALIGNANT': 1})
df_Mammography_AI['Class'] = df_Mammography_AI['pathology'].map({'BENIGN': 0,'BENIGN_WITHOUT_CALLBACK':0, 'MALIGNANT': 1})
df_Mammography_AI['annotated_Class'] = df_Mammography_AI['labelName_user_annotation'].map({'Benign': 0, 'Normal': 0,'Probably Benign':0, 'Probably Malignant':1, 'Malignant': 1})

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_Mammography_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_Mammography_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'Class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'Class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store accuracy data
accuracy_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate accuracy with AI for the current user
    accuracy_with_ai = accuracy_score(user_data_ai['annotated_Class'], user_data_ai['Class'])

    # Calculate accuracy without AI for the current user
    accuracy_without_ai = accuracy_score(user_data_normal['annotated_Class'], user_data_normal['Class'])

    # Append the results to the accuracy_data list as a dictionary
    accuracy_data.append({'updatedById': user_id, 'AccuracyWithAI': accuracy_with_ai, 'AccuracyWithoutAI': accuracy_without_ai})

# Create a DataFrame from the accuracy_data list
accuracy_Mammography_df = pd.DataFrame(accuracy_data)
accuracy_Mammography_df

Unnamed: 0,updatedById,AccuracyWithAI,AccuracyWithoutAI
0,U_60pMP6,0.666667,0.466667
1,U_60pVk6,0.6,0.466667
2,U_69P3RQ,0.666667,0.733333
3,U_69PnEQ,0.8,0.533333
4,U_6ARnj8,0.466667,0.666667
5,U_6wPe98,0.533333,0.666667
6,U_OBKAWQ,0.533333,0.333333
7,U_OX2wN6,0.466667,0.857143
8,U_OjydP8,0.666667,0.533333
9,U_OppZ0O,0.8,0.6


## CXR - Accuracy

In [19]:
import pandas as pd

# Define the ground truth columns
ground_truth_columns = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
                        'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding', 'Nodule',
                        'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']

# Create a new column for each label to check if user annotation matches ground truth
for gt_col in ground_truth_columns:
    user_col = f'{gt_col}_user_annotation'
    # Match 0 in ground truth to 0 and any value greater than 0 to 1
    df_CXR_Normal[f'{gt_col}_match'] = ((df_CXR_Normal[gt_col] == 0) & (df_CXR_Normal[user_col] == 0) | (df_CXR_Normal[user_col] > 0))

# Group the DataFrame by 'updatedById'
grouped = df_CXR_Normal.groupby('updatedById')

# Create a list of dictionaries to store user accuracies
user_accuracies = []

# Calculate accuracy for each user
for user, group in grouped:
    # Consider only the first 15 entries if there are more
    group = group.head(15)
    user_accuracy = group[[f'{gt_col}_match' for gt_col in ground_truth_columns]].mean().mean()
    user_accuracies.append({'User': user, 'Overall_Accuracy': user_accuracy})

# Convert the list of dictionaries to a DataFrame
user_NonAI_df = pd.DataFrame(user_accuracies)

print("User Accuracies (CXR) by User (Considering First 15 Entries):")
print(user_NonAI_df)


User Accuracies (CXR) by User (Considering First 15 Entries):
        User  Overall_Accuracy
0   U_60pMP6          0.964444
1   U_60pVk6          0.966667
2   U_69P3RQ          0.915556
3   U_69PnEQ          0.928889
4   U_6ARnj8          0.937778
5   U_6wPe98          0.911111
6   U_OBKAWQ          0.960000
7   U_OX2wN6          0.964444
8   U_OjydP8          0.915556
9   U_OppZ0O          0.942222
10  U_OqmjnO          0.924444
11  U_QWX7k6          0.928889


In [20]:
import pandas as pd

# Define the ground truth columns
ground_truth_columns = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
                        'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding', 'Nodule',
                        'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']

# Create a new column for each label to check if user annotation matches ground truth
for gt_col in ground_truth_columns:
    user_col = f'{gt_col}_user_annotation'
    # Match 0 in ground truth to 0 and any value greater than 0 to 1
    df_CXR_AI[f'{gt_col}_match'] = ((df_CXR_AI[gt_col] == 0) & (df_CXR_AI[user_col] == 0) | (df_CXR_AI[user_col] > 0))

# Group the DataFrame by 'updatedById'
grouped = df_CXR_AI.groupby('updatedById')

# Create a list of dictionaries to store user accuracies
user_accuracies = []

# Calculate accuracy for each user
for user, group in grouped:
    # Consider only the first 15 entries if there are more
    group = group.head(15)
    user_accuracy = group[[f'{gt_col}_match' for gt_col in ground_truth_columns]].mean().mean()
    user_accuracies.append({'User': user, 'Overall_Accuracy': user_accuracy})

# Convert the list of dictionaries to a DataFrame
user_AI_df = pd.DataFrame(user_accuracies)

print("User Accuracies (CXR) by User (Considering First 15 Entries):")
print(user_AI_df)


User Accuracies (CXR) by User (Considering First 15 Entries):
        User  Overall_Accuracy
0   U_60pMP6          0.960000
1   U_60pVk6          0.960000
2   U_69P3RQ          0.960000
3   U_69PnEQ          0.964444
4   U_6ARnj8          0.951515
5   U_6wPe98          0.964444
6   U_OBKAWQ          0.964444
7   U_OX2wN6          0.964444
8   U_OjydP8          0.968889
9   U_OppZ0O          0.991111
10  U_OqmjnO          0.977778
11  U_QWX7k6          0.973333


In [21]:
# Assuming you have the DataFrames 'user_accuracies_df' for users without AI and 'user_accuracies_df_ai' for users with AI.

# Merge the two DataFrames on the 'User' column
combined_user_accuracies = pd.merge(user_NonAI_df, user_AI_df, on='User', suffixes=('_NoAI', '_WithAI'))

# Now you have a DataFrame that contains accuracy data for each user both with and without AI.
print("Combined User Accuracies:")
print(combined_user_accuracies)


Combined User Accuracies:
        User  Overall_Accuracy_NoAI  Overall_Accuracy_WithAI
0   U_60pMP6               0.964444                 0.960000
1   U_60pVk6               0.966667                 0.960000
2   U_69P3RQ               0.915556                 0.960000
3   U_69PnEQ               0.928889                 0.964444
4   U_6ARnj8               0.937778                 0.951515
5   U_6wPe98               0.911111                 0.964444
6   U_OBKAWQ               0.960000                 0.964444
7   U_OX2wN6               0.964444                 0.964444
8   U_OjydP8               0.915556                 0.968889
9   U_OppZ0O               0.942222                 0.991111
10  U_OqmjnO               0.924444                 0.977778
11  U_QWX7k6               0.928889                 0.973333


In [22]:
# Rename the columns as specified
combined_user_accuracies = combined_user_accuracies.rename(columns={
    'User': 'updatedById',
    'Overall_Accuracy_NoAI': 'AccuracyWithoutAI',
    'Overall_Accuracy_WithAI': 'AccuracyWithAI'
})

# Now you have a DataFrame with the desired column names.
print("Combined User Accuracies:")
print(combined_user_accuracies)


Combined User Accuracies:
   updatedById  AccuracyWithoutAI  AccuracyWithAI
0     U_60pMP6           0.964444        0.960000
1     U_60pVk6           0.966667        0.960000
2     U_69P3RQ           0.915556        0.960000
3     U_69PnEQ           0.928889        0.964444
4     U_6ARnj8           0.937778        0.951515
5     U_6wPe98           0.911111        0.964444
6     U_OBKAWQ           0.960000        0.964444
7     U_OX2wN6           0.964444        0.964444
8     U_OjydP8           0.915556        0.968889
9     U_OppZ0O           0.942222        0.991111
10    U_OqmjnO           0.924444        0.977778
11    U_QWX7k6           0.928889        0.973333


## Accuracy of 4 modalities

In [23]:
# Merge the DataFrames for the 'MURA' modality
combined_user_accuracies = pd.merge(combined_user_accuracies, accuracy_MURA_df, on='updatedById', suffixes=('_CXR', '_MURA'))

# Merge the DataFrames for the 'Pneumothorax' modality
combined_user_accuracies = pd.merge(combined_user_accuracies, accuracy_Pneumothorax_df, on='updatedById', suffixes=('_MURA', '_Pneumothorax'))

# Merge the DataFrames for the 'Mammography' modality
combined_user_accuracies = pd.merge(combined_user_accuracies, accuracy_Mammography_df, on='updatedById', suffixes=('_Pneumothorax', '_Mammography'))

# Round all columns in the combined_user_accuracies DataFrame to 2 digits
combined_user_accuracies = combined_user_accuracies.round(2)
combined_user_accuracies

Unnamed: 0,updatedById,AccuracyWithoutAI_CXR,AccuracyWithAI_CXR,AccuracyWithAI_MURA,AccuracyWithoutAI_MURA,AccuracyWithAI_Pneumothorax,AccuracyWithoutAI_Pneumothorax,AccuracyWithAI_Mammography,AccuracyWithoutAI_Mammography
0,U_60pMP6,0.96,0.96,0.73,0.73,0.93,0.87,0.67,0.47
1,U_60pVk6,0.97,0.96,0.67,0.73,0.79,0.93,0.6,0.47
2,U_69P3RQ,0.92,0.96,0.93,0.6,0.73,0.93,0.67,0.73
3,U_69PnEQ,0.93,0.96,0.8,0.67,0.8,0.69,0.8,0.53
4,U_6ARnj8,0.94,0.95,0.87,0.6,0.93,0.87,0.47,0.67
5,U_6wPe98,0.91,0.96,0.6,0.47,0.87,0.67,0.53,0.67
6,U_OBKAWQ,0.96,0.96,0.73,0.67,0.87,0.8,0.53,0.33
7,U_OX2wN6,0.96,0.96,0.93,0.73,0.86,0.86,0.47,0.86
8,U_OjydP8,0.92,0.97,0.8,1.0,0.87,0.6,0.67,0.53
9,U_OppZ0O,0.94,0.99,0.8,0.67,0.73,0.8,0.8,0.6


## Overall accuracy of each User

In [17]:
# Create a new DataFrame for average accuracies with AI and without AI
average_accuracies_df = pd.DataFrame()

# Calculate the average accuracy for each user without AI
average_accuracies_df['updatedById'] = combined_user_accuracies['updatedById']
average_accuracies_df['Average_Accuracy_WithoutAI'] = combined_user_accuracies[['AccuracyWithoutAI_CXR', 'AccuracyWithoutAI_MURA', 'AccuracyWithoutAI_Pneumothorax', 'AccuracyWithoutAI_Mammography']].mean(axis=1)

# Calculate the average accuracy for each user with AI
average_accuracies_df['Average_Accuracy_WithAI'] = combined_user_accuracies[['AccuracyWithAI_CXR', 'AccuracyWithAI_MURA', 'AccuracyWithAI_Pneumothorax', 'AccuracyWithAI_Mammography']].mean(axis=1)

# Now you have a DataFrame with the average accuracy for each user with and without AI separately.
print("Average Accuracies:")
print(average_accuracies_df)


Average Accuracies:
   updatedById  Average_Accuracy_WithoutAI  Average_Accuracy_WithAI
0     U_60pMP6                    0.757778                 0.822143
1     U_60pVk6                    0.775000                 0.753095
2     U_69P3RQ                    0.795556                 0.823333
3     U_69PnEQ                    0.705299                 0.841111
4     U_6ARnj8                    0.767778                 0.803355
5     U_6wPe98                    0.677778                 0.741111
6     U_OBKAWQ                    0.690000                 0.774444
7     U_OX2wN6                    0.853016                 0.805397
8     U_OjydP8                    0.762222                 0.825556
9     U_OppZ0O                    0.752222                 0.831111
10    U_OqmjnO                    0.764444                 0.744444
11    U_QWX7k6                    0.729841                 0.906429


## MURA - metrics

In [3]:
import pandas as pd
from sklearn.metrics import confusion_matrix


# Each DataFrame contains user IDs and corresponding annotations 'annotated_Class' (0 or 1)
df_MURA_AI['annotated_Class'] = df_MURA_AI['labelName_user_annotation'].map({'Normal': 0, 'Abnormal': 1})

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_MURA_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_MURA_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'Class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'Class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store user metrics
user_metrics_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate TP, TN, FP, and FN for data with AI
    TP_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['Class'] == 1)).sum()
    TN_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['Class'] == 0)).sum()
    FP_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['Class'] == 1)).sum()
    FN_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['Class'] == 0)).sum()

    # Calculate TP, TN, FP, and FN for data without AI
    TP_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['Class'] == 1)).sum()
    TN_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['Class'] == 0)).sum()
    FP_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['Class'] == 1)).sum()
    FN_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['Class'] == 0)).sum()

    # Append the results to the user_metrics_data list as a dictionary
    user_metrics_data.append({
        'updatedById': user_id,
        'TP_WithAI': TP_with_ai,
        'TN_WithAI': TN_with_ai,
        'FP_WithAI': FP_with_ai,
        'FN_WithAI': FN_with_ai,
        'TP_WithoutAI': TP_without_ai,
        'TN_WithoutAI': TN_without_ai,
        'FP_WithoutAI': FP_without_ai,
        'FN_WithoutAI': FN_without_ai
    })

# Create a DataFrame from the user_metrics_data list
user_metrics_MURA_df = pd.DataFrame(user_metrics_data)

# Now you have a DataFrame that contains TP, TN, FP, and FN for every user both with and without AI.
print("User Metrics for MURA:")
print(user_metrics_MURA_df)


User Metrics for MURA:
   updatedById  TP_WithAI  TN_WithAI  FP_WithAI  FN_WithAI  TP_WithoutAI  \
0     U_60pMP6          6          5          2          2             6   
1     U_60pVk6          4          6          2          3             7   
2     U_69P3RQ          9          5          0          1             5   
3     U_69PnEQ          7          5          1          2             8   
4     U_6ARnj8          5          8          1          1             5   
5     U_6wPe98          3          6          1          5             4   
6     U_OBKAWQ          3          8          2          2             8   
7     U_OX2wN6          4         10          0          1             6   
8     U_OjydP8          5          7          2          1             5   
9     U_OppZ0O          3          9          1          2             5   
10    U_OqmjnO          5          5          4          1             5   
11    U_QWX7k6          5          9          1          0       

In [4]:
# Calculate accuracy, precision, and recall for data with AI
user_metrics_MURA_df['Accuracy_WithAI'] = (user_metrics_MURA_df['TP_WithAI'] + user_metrics_MURA_df['TN_WithAI']) / (user_metrics_MURA_df['TP_WithAI'] + user_metrics_MURA_df['TN_WithAI'] + user_metrics_MURA_df['FP_WithAI'] + user_metrics_MURA_df['FN_WithAI'])
user_metrics_MURA_df['Precision_WithAI'] = user_metrics_MURA_df['TP_WithAI'] / (user_metrics_MURA_df['TP_WithAI'] + user_metrics_MURA_df['FP_WithAI'])
user_metrics_MURA_df['Recall_WithAI'] = user_metrics_MURA_df['TP_WithAI'] / (user_metrics_MURA_df['TP_WithAI'] + user_metrics_MURA_df['FN_WithAI'])

# Calculate accuracy, precision, and recall for data without AI
user_metrics_MURA_df['Accuracy_WithoutAI'] = (user_metrics_MURA_df['TP_WithoutAI'] + user_metrics_MURA_df['TN_WithoutAI']) / (user_metrics_MURA_df['TP_WithoutAI'] + user_metrics_MURA_df['TN_WithoutAI'] + user_metrics_MURA_df['FP_WithoutAI'] + user_metrics_MURA_df['FN_WithoutAI'])
user_metrics_MURA_df['Precision_WithoutAI'] = user_metrics_MURA_df['TP_WithoutAI'] / (user_metrics_MURA_df['TP_WithoutAI'] + user_metrics_MURA_df['FP_WithoutAI'])
user_metrics_MURA_df['Recall_WithoutAI'] = user_metrics_MURA_df['TP_WithoutAI'] / (user_metrics_MURA_df['TP_WithoutAI'] + user_metrics_MURA_df['FN_WithoutAI'])

# Now the user_metrics_MURA_df DataFrame contains accuracy, precision, and recall for every user both with and without AI.
print("User Metrics for MURA with AI:")
print(user_metrics_MURA_df[['updatedById', 'Accuracy_WithAI', 'Precision_WithAI', 'Recall_WithAI']])

print("User Metrics for MURA without AI:")
print(user_metrics_MURA_df[['updatedById', 'Accuracy_WithoutAI', 'Precision_WithoutAI', 'Recall_WithoutAI']])


User Metrics for MURA with AI:
   updatedById  Accuracy_WithAI  Precision_WithAI  Recall_WithAI
0     U_60pMP6         0.733333          0.750000       0.750000
1     U_60pVk6         0.666667          0.666667       0.571429
2     U_69P3RQ         0.933333          1.000000       0.900000
3     U_69PnEQ         0.800000          0.875000       0.777778
4     U_6ARnj8         0.866667          0.833333       0.833333
5     U_6wPe98         0.600000          0.750000       0.375000
6     U_OBKAWQ         0.733333          0.600000       0.600000
7     U_OX2wN6         0.933333          1.000000       0.800000
8     U_OjydP8         0.800000          0.714286       0.833333
9     U_OppZ0O         0.800000          0.750000       0.600000
10    U_OqmjnO         0.666667          0.555556       0.833333
11    U_QWX7k6         0.933333          0.833333       1.000000
User Metrics for MURA without AI:
   updatedById  Accuracy_WithoutAI  Precision_WithoutAI  Recall_WithoutAI
0     U_60pMP6  

## Pneumothorax - Metrics

In [5]:
import pandas as pd

# Define column mappings for annotated classes
class_mapping = {'No Pneumothorax': 0, 'Pneumothorax': 1}

# Map annotated classes for both AI and Normal data
df_Pneumothorax_AI['annotated_Class'] = df_Pneumothorax_AI['labelName_user_annotation'].map(class_mapping)
df_Pneumothorax_Normal['annotated_Class'] = df_Pneumothorax_Normal['labelName'].map(class_mapping)

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_Pneumothorax_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_Pneumothorax_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store user metrics
user_metrics_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate TP, TN, FP, and FN for data with AI
    TP_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['class'] == 1)).sum()
    TN_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['class'] == 0)).sum()
    FP_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['class'] == 1)).sum()
    FN_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['class'] == 0)).sum()

    # Calculate TP, TN, FP, and FN for data without AI
    TP_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['class'] == 1)).sum()
    TN_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['class'] == 0)).sum()
    FP_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['class'] == 1)).sum()
    FN_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['class'] == 0)).sum()

    # Append the results to the user_metrics_data list as a dictionary
    user_metrics_data.append({
        'updatedById': user_id,
        'TP_WithAI': TP_with_ai,
        'TN_WithAI': TN_with_ai,
        'FP_WithAI': FP_with_ai,
        'FN_WithAI': FN_with_ai,
        'TP_WithoutAI': TP_without_ai,
        'TN_WithoutAI': TN_without_ai,
        'FP_WithoutAI': FP_without_ai,
        'FN_WithoutAI': FN_without_ai
    })

# Create a DataFrame from the user_metrics_data list
user_metrics_Pneumo_df = pd.DataFrame(user_metrics_data)

# Now you have a DataFrame that contains TP, TN, FP, and FN for every user both with and without AI.
print("User Metrics for Pneumothorax:")
print(user_metrics_Pneumo_df)


User Metrics for Pneumothorax:
   updatedById  TP_WithAI  TN_WithAI  FP_WithAI  FN_WithAI  TP_WithoutAI  \
0     U_60pMP6          2         11          1          0             2   
1     U_60pVk6          1         10          2          1             3   
2     U_69P3RQ          2          9          1          3             2   
3     U_69PnEQ          2         10          0          3             1   
4     U_6ARnj8          2         11          1          0             1   
5     U_6wPe98          3         10          0          2             5   
6     U_OBKAWQ          2         11          0          2             3   
7     U_OX2wN6          3          9          2          0             1   
8     U_OjydP8          3         10          0          2             3   
9     U_OppZ0O          2          9          3          1             2   
10    U_OqmjnO          2         10          1          2             3   
11    U_QWX7k6          3         11          0          

In [6]:
# Calculate accuracy, precision, and recall for data with AI
user_metrics_Pneumo_df['Accuracy_WithAI'] = (user_metrics_Pneumo_df['TP_WithAI'] + user_metrics_Pneumo_df['TN_WithAI']) / (user_metrics_Pneumo_df['TP_WithAI'] + user_metrics_Pneumo_df['TN_WithAI'] + user_metrics_Pneumo_df['FP_WithAI'] + user_metrics_Pneumo_df['FN_WithAI'])
user_metrics_Pneumo_df['Precision_WithAI'] = user_metrics_Pneumo_df['TP_WithAI'] / (user_metrics_Pneumo_df['TP_WithAI'] + user_metrics_Pneumo_df['FP_WithAI'])
user_metrics_Pneumo_df['Recall_WithAI'] = user_metrics_Pneumo_df['TP_WithAI'] / (user_metrics_Pneumo_df['TP_WithAI'] + user_metrics_Pneumo_df['FN_WithAI'])

# Calculate accuracy, precision, and recall for data without AI
user_metrics_Pneumo_df['Accuracy_WithoutAI'] = (user_metrics_Pneumo_df['TP_WithoutAI'] + user_metrics_Pneumo_df['TN_WithoutAI']) / (user_metrics_Pneumo_df['TP_WithoutAI'] + user_metrics_Pneumo_df['TN_WithoutAI'] + user_metrics_Pneumo_df['FP_WithoutAI'] + user_metrics_Pneumo_df['FN_WithoutAI'])
user_metrics_Pneumo_df['Precision_WithoutAI'] = user_metrics_Pneumo_df['TP_WithoutAI'] / (user_metrics_Pneumo_df['TP_WithoutAI'] + user_metrics_Pneumo_df['FP_WithoutAI'])
user_metrics_Pneumo_df['Recall_WithoutAI'] = user_metrics_Pneumo_df['TP_WithoutAI'] / (user_metrics_Pneumo_df['TP_WithoutAI'] + user_metrics_Pneumo_df['FN_WithoutAI'])

# Now the user_metrics_Pneumo_df DataFrame contains accuracy, precision, and recall for every user both with and without AI.
print("User Metrics for Pneumothorax with AI:")
print(user_metrics_Pneumo_df[['updatedById', 'Accuracy_WithAI', 'Precision_WithAI', 'Recall_WithAI']])

print("User Metrics for Pneumothorax without AI:")
print(user_metrics_Pneumo_df[['updatedById', 'Accuracy_WithoutAI', 'Precision_WithoutAI', 'Recall_WithoutAI']])


User Metrics for Pneumothorax with AI:
   updatedById  Accuracy_WithAI  Precision_WithAI  Recall_WithAI
0     U_60pMP6         0.928571          0.666667       1.000000
1     U_60pVk6         0.785714          0.333333       0.500000
2     U_69P3RQ         0.733333          0.666667       0.400000
3     U_69PnEQ         0.800000          1.000000       0.400000
4     U_6ARnj8         0.928571          0.666667       1.000000
5     U_6wPe98         0.866667          1.000000       0.600000
6     U_OBKAWQ         0.866667          1.000000       0.500000
7     U_OX2wN6         0.857143          0.600000       1.000000
8     U_OjydP8         0.866667          1.000000       0.600000
9     U_OppZ0O         0.733333          0.400000       0.666667
10    U_OqmjnO         0.800000          0.666667       0.500000
11    U_QWX7k6         0.933333          1.000000       0.750000
User Metrics for Pneumothorax without AI:
   updatedById  Accuracy_WithoutAI  Precision_WithoutAI  Recall_WithoutAI


## Mammographhy - metrics

In [8]:
import pandas as pd

df_Mammography_Normal['annotated_Class'] = df_Mammography_Normal['labelName'].map({'Benign': 0, 'Normal': 0, 'Probably Benign': 0, 'Probably Malignant': 1, 'Malignant': 1})
df_Mammography_Normal['Class'] = df_Mammography_Normal['pathology'].map({'BENIGN': 0, 'BENIGN_WITHOUT_CALLBACK': 0, 'MALIGNANT': 1})
df_Mammography_AI['Class'] = df_Mammography_AI['pathology'].map({'BENIGN': 0, 'BENIGN_WITHOUT_CALLBACK': 0, 'MALIGNANT': 1})
df_Mammography_AI['annotated_Class'] = df_Mammography_AI['labelName_user_annotation'].map({'Benign': 0, 'Normal': 0, 'Probably Benign': 0, 'Probably Malignant': 1, 'Malignant': 1})

# Group the data by user and select the first 15 entries for each user
first_15_entries_AI = df_Mammography_AI.groupby('updatedById').head(15)
first_15_entries_Normal = df_Mammography_Normal.groupby('updatedById').head(15)

# Remove rows with NaN values in both arrays
first_15_entries_AI = first_15_entries_AI.dropna(subset=['annotated_Class', 'Class'])
first_15_entries_Normal = first_15_entries_Normal.dropna(subset=['annotated_Class', 'Class'])

# Ensure that both arrays have the same number of elements
min_length = min(len(first_15_entries_AI), len(first_15_entries_Normal))
first_15_entries_AI = first_15_entries_AI.head(min_length)
first_15_entries_Normal = first_15_entries_Normal.head(min_length)

# Create a list to store user metrics
user_metrics_data = []

# Iterate through user IDs
for user_id in first_15_entries_AI['updatedById'].unique():
    # Select data for the current user
    user_data_ai = first_15_entries_AI[first_15_entries_AI['updatedById'] == user_id]
    user_data_normal = first_15_entries_Normal[first_15_entries_Normal['updatedById'] == user_id]

    # Calculate TP, TN, FP, and FN for data with AI
    TP_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['Class'] == 1)).sum()
    TN_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['Class'] == 0)).sum()
    FP_with_ai = ((user_data_ai['annotated_Class'] == 0) & (user_data_ai['Class'] == 1)).sum()
    FN_with_ai = ((user_data_ai['annotated_Class'] == 1) & (user_data_ai['Class'] == 0)).sum()

    # Calculate TP, TN, FP, and FN for data without AI
    TP_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['Class'] == 1)).sum()
    TN_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['Class'] == 0)).sum()
    FP_without_ai = ((user_data_normal['annotated_Class'] == 0) & (user_data_normal['Class'] == 1)).sum()
    FN_without_ai = ((user_data_normal['annotated_Class'] == 1) & (user_data_normal['Class'] == 0)).sum()

    # Append the results to the user_metrics_data list as a dictionary
    user_metrics_data.append({
        'updatedById': user_id,
        'TP_WithAI': TP_with_ai,
        'TN_WithAI': TN_with_ai,
        'FP_WithAI': FP_with_ai,
        'FN_WithAI': FN_with_ai,
        'TP_WithoutAI': TP_without_ai,
        'TN_WithoutAI': TN_without_ai,
        'FP_WithoutAI': FP_without_ai,
        'FN_WithoutAI': FN_without_ai
    })

# Create a DataFrame from the user_metrics_data list
user_metrics_Mammo_df = pd.DataFrame(user_metrics_data)

# Now you have a DataFrame that contains TP, TN, FP, and FN for every user both with and without AI.
print("User Metrics for Mammography:")
print(user_metrics_Mammo_df)


User Metrics for Mammography:
   updatedById  TP_WithAI  TN_WithAI  FP_WithAI  FN_WithAI  TP_WithoutAI  \
0     U_60pMP6          1          9          5          0             2   
1     U_60pVk6          0          9          4          2             0   
2     U_69P3RQ          2          8          3          2             3   
3     U_69PnEQ          3          9          3          0             3   
4     U_6ARnj8          2          5          4          4             4   
5     U_6wPe98          1          7          5          2             3   
6     U_OBKAWQ          1          7          6          1             1   
7     U_OX2wN6          1          6          6          2             8   
8     U_OjydP8          2          8          3          2             1   
9     U_OppZ0O          3          9          2          1             5   
10    U_OqmjnO          2          6          5          2             1   
11    U_QWX7k6          3          8          1          2

In [9]:
# Calculate accuracy, precision, and recall for data with AI
user_metrics_Mammo_df['Accuracy_WithAI'] = (user_metrics_Mammo_df['TP_WithAI'] + user_metrics_Mammo_df['TN_WithAI']) / (user_metrics_Mammo_df['TP_WithAI'] + user_metrics_Mammo_df['TN_WithAI'] + user_metrics_Mammo_df['FP_WithAI'] + user_metrics_Mammo_df['FN_WithAI'])
user_metrics_Mammo_df['Precision_WithAI'] = user_metrics_Mammo_df['TP_WithAI'] / (user_metrics_Mammo_df['TP_WithAI'] + user_metrics_Mammo_df['FP_WithAI'])
user_metrics_Mammo_df['Recall_WithAI'] = user_metrics_Mammo_df['TP_WithAI'] / (user_metrics_Mammo_df['TP_WithAI'] + user_metrics_Mammo_df['FN_WithAI'])

# Calculate accuracy, precision, and recall for data without AI
user_metrics_Mammo_df['Accuracy_WithoutAI'] = (user_metrics_Mammo_df['TP_WithoutAI'] + user_metrics_Mammo_df['TN_WithoutAI']) / (user_metrics_Mammo_df['TP_WithoutAI'] + user_metrics_Mammo_df['TN_WithoutAI'] + user_metrics_Mammo_df['FP_WithoutAI'] + user_metrics_Mammo_df['FN_WithoutAI'])
user_metrics_Mammo_df['Precision_WithoutAI'] = user_metrics_Mammo_df['TP_WithoutAI'] / (user_metrics_Mammo_df['TP_WithoutAI'] + user_metrics_Mammo_df['FP_WithoutAI'])
user_metrics_Mammo_df['Recall_WithoutAI'] = user_metrics_Mammo_df['TP_WithoutAI'] / (user_metrics_Mammo_df['TP_WithoutAI'] + user_metrics_Mammo_df['FN_WithoutAI'])

# Now the user_metrics_Mammo_df DataFrame contains accuracy, precision, and recall for every user both with and without AI.
print("User Metrics for Mammography with AI:")
print(user_metrics_Mammo_df[['updatedById', 'Accuracy_WithAI', 'Precision_WithAI', 'Recall_WithAI']])

print("User Metrics for Mammography without AI:")
print(user_metrics_Mammo_df[['updatedById', 'Accuracy_WithoutAI', 'Precision_WithoutAI', 'Recall_WithoutAI']])


User Metrics for Mammography with AI:
   updatedById  Accuracy_WithAI  Precision_WithAI  Recall_WithAI
0     U_60pMP6         0.666667          0.166667       1.000000
1     U_60pVk6         0.600000          0.000000       0.000000
2     U_69P3RQ         0.666667          0.400000       0.500000
3     U_69PnEQ         0.800000          0.500000       1.000000
4     U_6ARnj8         0.466667          0.333333       0.333333
5     U_6wPe98         0.533333          0.166667       0.333333
6     U_OBKAWQ         0.533333          0.142857       0.500000
7     U_OX2wN6         0.466667          0.142857       0.333333
8     U_OjydP8         0.666667          0.400000       0.500000
9     U_OppZ0O         0.800000          0.600000       0.750000
10    U_OqmjnO         0.533333          0.285714       0.500000
11    U_QWX7k6         0.785714          0.750000       0.600000
User Metrics for Mammography without AI:
   updatedById  Accuracy_WithoutAI  Precision_WithoutAI  Recall_WithoutAI
0 

## CXR - metrics

In [10]:
import pandas as pd

# Define the ground truth columns
ground_truth_columns = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
                        'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding', 'Nodule',
                        'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']

# Define a function to calculate TP, TN, FP, FN
def calculate_metrics(df, user_col, gt_col):
    TP = ((df[gt_col] == 1) & (df[user_col] == 1)).sum()
    TN = ((df[gt_col] == 0) & (df[user_col] == 0)).sum()
    FP = ((df[gt_col] == 0) & (df[user_col] == 1)).sum()
    FN = ((df[gt_col] == 1) & (df[user_col] == 0)).sum()
    return TP, TN, FP, FN

# Create a list to store user metrics
user_metrics_data = []

# Group the DataFrame by 'updatedById'
grouped_normal = df_CXR_Normal.groupby('updatedById')
grouped_ai = df_CXR_AI.groupby('updatedById')

# Iterate through user IDs
for user_id in df_CXR_Normal['updatedById'].unique():
    user_data_normal = grouped_normal.get_group(user_id)
    user_data_ai = grouped_ai.get_group(user_id)
    
    # Calculate TP, TN, FP, FN for each ground truth column
    user_metrics = {'updatedById': user_id}
    for gt_col in ground_truth_columns:
        TP_normal, TN_normal, FP_normal, FN_normal = calculate_metrics(user_data_normal, f'{gt_col}_user_annotation', gt_col)
        TP_ai, TN_ai, FP_ai, FN_ai = calculate_metrics(user_data_ai, f'{gt_col}_user_annotation', gt_col)
        
        user_metrics[f'TP_Normal_{gt_col}'] = TP_normal
        user_metrics[f'TN_Normal_{gt_col}'] = TN_normal
        user_metrics[f'FP_Normal_{gt_col}'] = FP_normal
        user_metrics[f'FN_Normal_{gt_col}'] = FN_normal
        user_metrics[f'TP_AI_{gt_col}'] = TP_ai
        user_metrics[f'TN_AI_{gt_col}'] = TN_ai
        user_metrics[f'FP_AI_{gt_col}'] = FP_ai
        user_metrics[f'FN_AI_{gt_col}'] = FN_ai

    user_metrics_data.append(user_metrics)

# Create DataFrames for normal and AI user metrics
user_metrics_df = pd.DataFrame(user_metrics_data)
user_metrics_df
# Now you have DataFrames with TP, TN, FP, FN for every user both with and without AI for each ground truth column.
# You can proceed to calculate accuracy from these metrics.


Unnamed: 0,updatedById,TP_Normal_Atelectasis,TN_Normal_Atelectasis,FP_Normal_Atelectasis,FN_Normal_Atelectasis,TP_AI_Atelectasis,TN_AI_Atelectasis,FP_AI_Atelectasis,FN_AI_Atelectasis,TP_Normal_Cardiomegaly,...,FP_AI_Pneumonia,FN_AI_Pneumonia,TP_Normal_Pneumothorax,TN_Normal_Pneumothorax,FP_Normal_Pneumothorax,FN_Normal_Pneumothorax,TP_AI_Pneumothorax,TN_AI_Pneumothorax,FP_AI_Pneumothorax,FN_AI_Pneumothorax
0,U_60pMP6,0,23,1,1,0,15,0,0,0,...,0,0,0,30,0,0,0,15,0,0
1,U_60pVk6,0,12,0,1,0,10,3,1,0,...,0,1,0,14,0,0,0,15,0,0
2,U_69P3RQ,0,10,1,3,0,12,3,0,0,...,1,0,0,15,0,0,0,15,0,0
3,U_69PnEQ,0,26,1,0,0,14,1,0,0,...,0,0,0,30,0,0,0,15,0,0
4,U_6ARnj8,0,26,0,2,1,8,1,0,1,...,0,0,1,29,0,0,0,11,0,0
5,U_6wPe98,0,12,0,2,0,13,3,0,0,...,0,0,0,13,0,2,0,16,0,0
6,U_OBKAWQ,2,10,0,3,0,12,3,0,0,...,0,0,0,13,0,1,0,15,0,0
7,U_OX2wN6,0,13,1,1,0,11,1,0,1,...,0,0,0,14,1,0,0,15,0,0
8,U_OjydP8,0,19,2,6,0,12,2,1,1,...,0,0,0,29,0,1,0,15,0,0
9,U_OppZ0O,1,11,2,1,0,15,0,0,0,...,0,0,0,15,0,0,0,15,0,0


In [11]:
import pandas as pd

# Define the ground truth columns
ground_truth_columns = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
                        'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding', 'Nodule',
                        'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']

# Define a function to calculate TP, TN, FP, FN
def calculate_metrics(df, user_col, gt_col):
    TP = ((df[gt_col] == 1) & (df[user_col] == 1)).sum()
    TN = ((df[gt_col] == 0) & (df[user_col] == 0)).sum()
    FP = ((df[gt_col] == 0) & (df[user_col] == 1)).sum()
    FN = ((df[gt_col] == 1) & (df[user_col] == 0)).sum()
    return TP, TN, FP, FN

# Create a list to store user metrics
user_metrics_data = []

# Group the DataFrame by 'updatedById'
grouped_normal = df_CXR_Normal.groupby('updatedById')
grouped_ai = df_CXR_AI.groupby('updatedById')

# Iterate through user IDs
for user_id in df_CXR_Normal['updatedById'].unique():
    user_data_normal = grouped_normal.get_group(user_id)
    user_data_ai = grouped_ai.get_group(user_id)
    
    # Calculate TP, TN, FP, FN for each ground truth column and for the entire modality
    user_metrics = {'updatedById': user_id}
    modality_TP_normal = 0
    modality_TN_normal = 0
    modality_FP_normal = 0
    modality_FN_normal = 0
    modality_TP_ai = 0
    modality_TN_ai = 0
    modality_FP_ai = 0
    modality_FN_ai = 0

    for gt_col in ground_truth_columns:
        TP_normal, TN_normal, FP_normal, FN_normal = calculate_metrics(user_data_normal, f'{gt_col}_user_annotation', gt_col)
        TP_ai, TN_ai, FP_ai, FN_ai = calculate_metrics(user_data_ai, f'{gt_col}_user_annotation', gt_col)
        
        user_metrics[f'TP_Normal_{gt_col}'] = TP_normal
        user_metrics[f'TN_Normal_{gt_col}'] = TN_normal
        user_metrics[f'FP_Normal_{gt_col}'] = FP_normal
        user_metrics[f'FN_Normal_{gt_col}'] = FN_normal
        user_metrics[f'TP_AI_{gt_col}'] = TP_ai
        user_metrics[f'TN_AI_{gt_col}'] = TN_ai
        user_metrics[f'FP_AI_{gt_col}'] = FP_ai
        user_metrics[f'FN_AI_{gt_col}'] = FN_ai

        # Accumulate values for the entire modality
        modality_TP_normal += TP_normal
        modality_TN_normal += TN_normal
        modality_FP_normal += FP_normal
        modality_FN_normal += FN_normal
        modality_TP_ai += TP_ai
        modality_TN_ai += TN_ai
        modality_FP_ai += FP_ai
        modality_FN_ai += FN_ai

    # Store modality-level metrics in the user_metrics dictionary
    user_metrics['TP_WithoutAI'] = modality_TP_normal
    user_metrics['TN_WithoutAI'] = modality_TN_normal
    user_metrics['FP_WithoutAI'] = modality_FP_normal
    user_metrics['FN_WithoutAI'] = modality_FN_normal
    user_metrics['TP_WithAI'] = modality_TP_ai
    user_metrics['TN_WithAI'] = modality_TN_ai
    user_metrics['FP_WithAI'] = modality_FP_ai
    user_metrics['FN_WithAI'] = modality_FN_ai

    user_metrics_data.append(user_metrics)

# Create DataFrames for normal and AI user metrics
user_metrics_df = pd.DataFrame(user_metrics_data)
user_metrics_df


Unnamed: 0,updatedById,TP_Normal_Atelectasis,TN_Normal_Atelectasis,FP_Normal_Atelectasis,FN_Normal_Atelectasis,TP_AI_Atelectasis,TN_AI_Atelectasis,FP_AI_Atelectasis,FN_AI_Atelectasis,TP_Normal_Cardiomegaly,...,FP_AI_Pneumothorax,FN_AI_Pneumothorax,TP_WithoutAI,TN_WithoutAI,FP_WithoutAI,FN_WithoutAI,TP_WithAI,TN_WithAI,FP_WithAI,FN_WithAI
0,U_60pMP6,0,23,1,1,0,15,0,0,0,...,0,0,14,401,18,8,8,201,4,9
1,U_60pVk6,0,12,0,1,0,10,3,1,0,...,0,0,8,187,7,5,10,193,9,9
2,U_69P3RQ,0,10,1,3,0,12,3,0,0,...,0,0,3,189,13,14,9,193,10,9
3,U_69PnEQ,0,26,1,0,0,14,1,0,0,...,0,0,8,391,22,15,7,195,10,8
4,U_6ARnj8,0,26,0,2,1,8,1,0,1,...,0,0,13,394,17,12,3,142,9,8
5,U_6wPe98,0,12,0,2,0,13,3,0,0,...,0,0,3,190,11,15,8,214,6,9
6,U_OBKAWQ,2,10,0,3,0,12,3,0,0,...,0,0,9,200,6,6,9,195,11,8
7,U_OX2wN6,0,13,1,1,0,11,1,0,1,...,0,0,9,198,9,6,7,199,5,8
8,U_OjydP8,0,19,2,6,0,12,2,1,1,...,0,0,11,386,21,24,10,191,14,7
9,U_OppZ0O,1,11,2,1,0,15,0,0,0,...,0,0,6,193,12,10,13,208,2,2


In [12]:
# List of columns to extract
CXR_columns = ['updatedById','TP_WithoutAI', 'TN_WithoutAI', 'FP_WithoutAI', 'FN_WithoutAI',
                    'TP_WithAI', 'TN_WithAI', 'FP_WithAI', 'FN_WithAI']

# Create a DataFrame with the selected columns
CXR_metrics_df = user_metrics_df[CXR_columns]

# Print the new DataFrame
print(CXR_metrics_df)


   updatedById  TP_WithoutAI  TN_WithoutAI  FP_WithoutAI  FN_WithoutAI  \
0     U_60pMP6            14           401            18             8   
1     U_60pVk6             8           187             7             5   
2     U_69P3RQ             3           189            13            14   
3     U_69PnEQ             8           391            22            15   
4     U_6ARnj8            13           394            17            12   
5     U_6wPe98             3           190            11            15   
6     U_OBKAWQ             9           200             6             6   
7     U_OX2wN6             9           198             9             6   
8     U_OjydP8            11           386            21            24   
9     U_OppZ0O             6           193            12            10   
10    U_OqmjnO             3           193            12             6   
11    U_QWX7k6             3           190            14            12   

    TP_WithAI  TN_WithAI  FP_WithAI  

In [16]:
import warnings
warnings.filterwarnings("ignore")

# Your code here
# Calculate Accuracy for 'WithAI' and 'WithoutAI' separately and replace in CXR_metrics_df
CXR_metrics_df['Accuracy_WithAI'] = (CXR_metrics_df['TP_WithAI'] + CXR_metrics_df['TN_WithAI']) / (CXR_metrics_df['TP_WithAI'] + CXR_metrics_df['TN_WithAI'] + CXR_metrics_df['FP_WithAI'] + CXR_metrics_df['FN_WithAI'])
CXR_metrics_df['Accuracy_WithoutAI'] = (CXR_metrics_df['TP_WithoutAI'] + CXR_metrics_df['TN_WithoutAI']) / (CXR_metrics_df['TP_WithoutAI'] + CXR_metrics_df['TN_WithoutAI'] + CXR_metrics_df['FP_WithoutAI'] + CXR_metrics_df['FN_WithoutAI'])

# Print the updated CXR_metrics_df
print(CXR_metrics_df)



   updatedById  TP_WithoutAI  TN_WithoutAI  FP_WithoutAI  FN_WithoutAI  \
0     U_60pMP6            14           401            18             8   
1     U_60pVk6             8           187             7             5   
2     U_69P3RQ             3           189            13            14   
3     U_69PnEQ             8           391            22            15   
4     U_6ARnj8            13           394            17            12   
5     U_6wPe98             3           190            11            15   
6     U_OBKAWQ             9           200             6             6   
7     U_OX2wN6             9           198             9             6   
8     U_OjydP8            11           386            21            24   
9     U_OppZ0O             6           193            12            10   
10    U_OqmjnO             3           193            12             6   
11    U_QWX7k6             3           190            14            12   

    TP_WithAI  TN_WithAI  FP_WithAI  

In [14]:
# Define the list of DataFrames
dataframes = [user_metrics_MURA_df, user_metrics_Pneumo_df, user_metrics_Mammo_df, CXR_metrics_df]

# Define the list of columns to sum
columns_to_sum = ['TP_WithoutAI', 'TN_WithoutAI', 'FP_WithoutAI', 'FN_WithoutAI',
                    'TP_WithAI', 'TN_WithAI', 'FP_WithAI', 'FN_WithAI']

# Initialize an empty DataFrame to store the combined results
combined_metrics_df = pd.DataFrame()

# Iterate through the list of columns
for column in columns_to_sum:
    total_column = pd.concat([df.set_index('updatedById')[column] for df in dataframes], axis=1).sum(axis=1)
    combined_metrics_df[column] = total_column

# Set the 'updatedById' column as a regular column
combined_metrics_df.reset_index(inplace=True)

# Print the combined DataFrame
print(combined_metrics_df)


   updatedById  TP_WithoutAI  TN_WithoutAI  FP_WithoutAI  FN_WithoutAI  \
0     U_60pMP6            24           422            29            11   
1     U_60pVk6            18           209            17             8   
2     U_69P3RQ            13           213            16            22   
3     U_69PnEQ            20           406            25            28   
4     U_6ARnj8            23           416            21            21   
5     U_6wPe98            15           205            16            28   
6     U_OBKAWQ            21           215            15            15   
7     U_OX2wN6            24           218            10            13   
8     U_OjydP8            20           409            25            33   
9     U_OppZ0O            18           212            13            23   
10    U_OqmjnO            12           216            16            15   
11    U_QWX7k6            16           206            18            23   

    TP_WithAI  TN_WithAI  FP_WithAI  

In [15]:
# Calculate Accuracy for 'WithAI' and 'WithoutAI' separately
combined_metrics_df['Accuracy_WithAI'] = (combined_metrics_df['TP_WithAI'] + combined_metrics_df['TN_WithAI']) / (combined_metrics_df['TP_WithAI'] + combined_metrics_df['TN_WithAI'] + combined_metrics_df['FP_WithAI'] + combined_metrics_df['FN_WithAI'])
combined_metrics_df['Accuracy_WithoutAI'] = (combined_metrics_df['TP_WithoutAI'] + combined_metrics_df['TN_WithoutAI']) / (combined_metrics_df['TP_WithoutAI'] + combined_metrics_df['TN_WithoutAI'] + combined_metrics_df['FP_WithoutAI'] + combined_metrics_df['FN_WithoutAI'])

# Print the combined DataFrame with accuracy for 'WithAI' and 'WithoutAI'
print(combined_metrics_df)

   updatedById  TP_WithoutAI  TN_WithoutAI  FP_WithoutAI  FN_WithoutAI  \
0     U_60pMP6            24           422            29            11   
1     U_60pVk6            18           209            17             8   
2     U_69P3RQ            13           213            16            22   
3     U_69PnEQ            20           406            25            28   
4     U_6ARnj8            23           416            21            21   
5     U_6wPe98            15           205            16            28   
6     U_OBKAWQ            21           215            15            15   
7     U_OX2wN6            24           218            10            13   
8     U_OjydP8            20           409            25            33   
9     U_OppZ0O            18           212            13            23   
10    U_OqmjnO            12           216            16            15   
11    U_QWX7k6            16           206            18            23   

    TP_WithAI  TN_WithAI  FP_WithAI  