In [1]:
# studywise
import pandas as pd

def extract_labels_from_scores(row, labels, threshold=0.5):
    return [label for label in labels if row[label] > threshold]

def calculate_metrics(predictions, ground_truth, labels):
    TP = sum([1 for label in labels if label in predictions and label in ground_truth])
    TN = sum([1 for label in labels if label not in predictions and label not in ground_truth])
    FP = sum([1 for label in labels if label in predictions and label not in ground_truth])
    FN = sum([1 for label in labels if label not in predictions and label in ground_truth])
    
    accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) != 0 else 0
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
    
    return {"TP": TP, "TN": TN, "FP": FP, "FN": FN, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1_score}

def main():
    # Read the Excel file
    df = pd.read_excel("/Users/charliethebear/Documents/Lab/2023_summer/excel/0822_average_scores.xlsx")

    # Extract the list of labels from the column names
    labels = list(df.columns)[1:19] # Assuming the first column is 'Image Name' and the last two columns are 'Extracted labels' and 'gt labels'

    metrics_data = []

    # Process the data
    for _, row in df.iterrows():
        gt_labels = row['gt_labels'].split(', ')
        biomedclip_labels = extract_labels_from_scores(row, labels)
        gpt_labels = row['Extracted_labels(GPT-3.5)'].split(', ')

        # Calculate metrics
        metrics_biomedclip_vs_gt = calculate_metrics(biomedclip_labels, gt_labels, labels)
        metrics_gpt_vs_gt = calculate_metrics(gpt_labels, gt_labels, labels)
        metrics_biomedclip_vs_gpt = calculate_metrics(biomedclip_labels, gpt_labels, labels)

        metrics_data.append({
            "Image Name": row["Image Name"],
            **{f"BiomedCLIP vs GT - {key}": value for key, value in metrics_biomedclip_vs_gt.items()},
            **{f"GPT vs GT - {key}": value for key, value in metrics_gpt_vs_gt.items()},
            **{f"BiomedCLIP vs GPT - {key}": value for key, value in metrics_biomedclip_vs_gpt.items()}
        })

    metrics_df = pd.DataFrame(metrics_data)
    metrics_df.to_excel("/Users/charliethebear/Documents/Lab/2023_summer/excel/0822_evaluation.xlsx", index=False)

if __name__ == "__main__":
    main()


In [None]:
# labelwise
import pandas as pd

def extract_labels_from_scores(row, labels, threshold=0.5):
    return [label for label in labels if row[label] > threshold]

def calculate_labelwise_metrics(df, labels):
    labelwise_data = []

    for label in labels:
        metrics_data = {"Label": label}

        for comparison, prefix in [("BiomedCLIP vs GT", "BiomedCLIP vs GT"), ("GPT vs GT", "GPT vs GT"), ("BiomedCLIP vs GPT", "BiomedCLIP vs GPT")]:
            tp, tn, fp, fn = 0, 0, 0, 0

            for _, row in df.iterrows():
                gt_labels = row['gt_labels'].split(', ')
                biomedclip_labels = extract_labels_from_scores(row, labels)
                gpt_labels = row['Extracted_labels(GPT-3.5)'].split(', ')

                pred_labels = biomedclip_labels if "BiomedCLIP" in comparison else gpt_labels
                true_labels = gt_labels if "GT" in comparison else (gpt_labels if "GPT" in comparison and "BiomedCLIP" not in comparison else biomedclip_labels)

                if label in pred_labels and label in true_labels: tp += 1
                if label not in pred_labels and label not in true_labels: tn += 1
                if label in pred_labels and label not in true_labels: fp += 1
                if label not in pred_labels and label in true_labels: fn += 1
            
            accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) != 0 else 0
            precision = tp / (tp + fp) if (tp + fp) != 0 else 0
            recall = tp / (tp + fn) if (tp + fn) != 0 else 0
            f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

            metrics_data.update({
                f"{prefix} - TP": tp, f"{prefix} - TN": tn, f"{prefix} - FP": fp, f"{prefix} - FN": fn,
                f"{prefix} - Accuracy": accuracy, f"{prefix} - Precision": precision, f"{prefix} - Recall": recall, f"{prefix} - F1 Score": f1_score
            })

        labelwise_data.append(metrics_data)

    return pd.DataFrame(labelwise_data)

def main():
    # Read the Excel file
    df = pd.read_excel("/Users/charliethebear/Documents/Lab/2023_summer/excel/0822_average_scores.xlsx")

    # Extract the list of labels from the column names
    labels = list(df.columns)[1:19]  # Adjust this according to the column names in your Excel file

    # Calculate label-wise metrics
    labelwise_df = calculate_labelwise_metrics(df, labels)

    # Write the results to an Excel file
    labelwise_df.to_excel("/Users/charliethebear/Documents/Lab/2023_summer/excel/0822_evaluation_labelwise.xlsx", index=False)

if __name__ == "__main__":
    main()
