In [1]:
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Load the dataset
file_path = 'E:/1 PROJECT/RESEARCH PROJECTS/HTAN research/DR/DAta/wrkingSPSS_editGPT4_2.csv'
df = pd.read_csv(file_path)

# Function to calculate the performance metrics
def calculate_metrics(true_labels, predictions):
    tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()
    sensitivity = tp / (tp + fn)  # Recall
    specificity = tn / (tn + fp)
    ppv = tp / (tp + fp)  # Precision
    npv = tn / (tn + fn)
    accuracy = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)
    auc = roc_auc_score(true_labels, predictions)
    return sensitivity, specificity, ppv, npv, accuracy, f1, auc

# Ensure all labels are of the same type
def convert_to_int(series):
    return series.astype(int)

# Convert true labels and predictions to integers
df['TRUE_DR'] = convert_to_int(df['TRUE_DR'])
df['rDR_true'] = convert_to_int(df['rDR_true'])
df['TRUE_Maculopathy'] = convert_to_int(df['TRUE_Maculopathy'])

for col in ['DR_GPT4', 'DR_Gemini1.5', 'DR_Claude3', 'DR_mistralAI',
            'rDR_GPT4', 'rDR_Gemini', 'rDR_Clade3', 'rDR_Mistral',
            'Maculopathy_GPT4', 'Maculopathy_Gemini1.5', 'Maculopathy_Claude3', 'Maculopathy_mistralAI']:
    df[col] = convert_to_int(df[col])

# Columns for presence of DR
true_dr = 'TRUE_DR'
predictions_dr = ['DR_GPT4', 'DR_Gemini1.5', 'DR_Claude3', 'DR_mistralAI']

# Columns for presence of referable DR
true_rdr = 'rDR_true'
predictions_rdr = ['rDR_GPT4', 'rDR_Gemini', 'rDR_Clade3', 'rDR_Mistral']

# Columns for presence of maculopathy
true_maculopathy = 'TRUE_Maculopathy'
predictions_maculopathy = ['Maculopathy_GPT4', 'Maculopathy_Gemini1.5', 'Maculopathy_Claude3', 'Maculopathy_mistralAI']

# Calculate and print metrics for each model
metrics_columns = ['Model', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Accuracy', 'F1 Score', 'AUROC']
results = []

for model in predictions_dr:
    metrics = calculate_metrics(df[true_dr], df[model])
    results.append(['Presence of DR - ' + model] + list(metrics))

for model in predictions_rdr:
    metrics = calculate_metrics(df[true_rdr], df[model])
    results.append(['Presence of referable DR - ' + model] + list(metrics))

for model in predictions_maculopathy:
    metrics = calculate_metrics(df[true_maculopathy], df[model])
    results.append(['Presence of Maculopathy - ' + model] + list(metrics))

# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results, columns=metrics_columns)

# Display the results
print(results_df)


                                              Model  Sensitivity  Specificity  \
0                          Presence of DR - DR_GPT4     0.819672     0.792453   
1                     Presence of DR - DR_Gemini1.5     0.983607     0.094340   
2                       Presence of DR - DR_Claude3     0.983607     0.094340   
3                     Presence of DR - DR_mistralAI     1.000000     0.000000   
4               Presence of referable DR - rDR_GPT4     0.800000     0.804688   
5             Presence of referable DR - rDR_Gemini     0.680000     0.906250   
6             Presence of referable DR - rDR_Clade3     0.960000     0.156250   
7            Presence of referable DR - rDR_Mistral     0.620000     0.390625   
8        Presence of Maculopathy - Maculopathy_GPT4     0.425000     0.864865   
9   Presence of Maculopathy - Maculopathy_Gemini1.5     0.825000     0.229730   
10    Presence of Maculopathy - Maculopathy_Claude3     0.100000     0.905405   
11  Presence of Maculopathy 

  npv = tn / (tn + fn)
