In [1]:
import pandas as pd
from collections import Counter

# Functions

In [2]:
def average_precision_at_k(row, k):
    relevant_count = 0
    precision_sum = 0
    for i in range(1, k+1):
        ret_site = row[f'ret_{i}_diagnosis']
        if ret_site is None:
            continue
        if ret_site == row['query_diagnosis']:
            relevant_count += 1
            precision_sum += relevant_count / i
    if relevant_count == 0:
        return 0
    return precision_sum / min(k, relevant_count)

In [3]:
def majority_voting_at_k(row, k):
    votes = []
    for i in range(1, k+1):
        ret_site = row[f'ret_{i}_diagnosis']
        if pd.isnull(ret_site):
            votes.append(-1)
        else:
            votes.append(ret_site)
    if not votes:
        return 0
    counter = Counter(votes)
    most_common = counter.most_common(1)
    if most_common[0][0] == row['query_diagnosis']:
        return 1
    elif most_common[0][0] == -1:
        return None
    else:
        return 0

In [4]:
def calculate_site_metrics(df):
    tmp = []
    for k in [1, 3, 5]:
        df[f'MV_at_{k}'] = df.apply(lambda row: majority_voting_at_k(row, k), axis=1)
        tmp.extend([df[f"MV_at_{k}"].mean()])

    for k in [3, 5]:
        df[f'AP_at_{k}'] = df.apply(lambda row: average_precision_at_k(row, k), axis=1)
        tmp.extend([df[f"AP_at_{k}"].mean()])
    return tmp

In [None]:
records = []
for model in ["yottixel", "sish", "retccl", "hshr"]:
    # print(f"{model} Results:")
    for exp in ["GBM_MICROSCOPE_CPTAC"]:
        site_results = pd.read_csv(f"data/{model}/{exp}/sub_type.csv")

        if exp == "BRCA_HER2":
            df1 = site_results[site_results["query_name"].apply(lambda x: x.startswith("Her2Pos"))]
            df2 = site_results[site_results["query_name"].apply(lambda x: x.startswith("Her2Neg"))]
            # print(f"BRCA_HER2_POS results:")
            tmp = calculate_site_metrics(df1)
            tmp.extend(["Her2Pos", model])
            records.append(tmp)
            # print(f"BRCA_HER2_POS results:")
            tmp = calculate_site_metrics(df2)
            tmp.extend(["Her2Neg", model])
            records.append(tmp)
        else:
            # print(f"{exp} results:")
            tmp = calculate_site_metrics(site_results)
            tmp.extend([exp, model])
            records.append(tmp)

columns = ["mMV_at_1", "mMV_at_3", "mMV_at_5", "mAP_at_3", "mAP_at_5", "Exp", "Model"]
df = pd.DataFrame.from_records(records, columns=columns)
df.to_csv("site_results.csv", index=False)

In [None]:
records = []
for model in ["yottixel", "sish", "hshr"]:
    for exp in ["GBM_MICROSCOPE_UPENN"]:
        site_results = pd.read_csv(f"data/{model}/{exp}/sub_type.csv")
        tmp = calculate_site_metrics(site_results)
        tmp.extend([exp, model])
        records.append(tmp)

columns = ["mMV_at_1", "mMV_at_3", "mMV_at_5", "mAP_at_3", "mAP_at_5", "Exp", "Model"]
df = pd.DataFrame.from_records(records, columns=columns)
df.to_csv("site_results2.csv", index=False)

In [None]:
columns = ["MV_at_1", "MV_at_3", "MV_at_5", "AP_at_3", "AP_at_5"]
for model in ["yottixel", "sish", "retccl", "hshr"]:
    for exp in ["UCLA", "READER_STUDY"]:
        site_results = pd.read_csv(f"data/{model}/{exp}/sub_type.csv")
        for k in [1, 3, 5]:
            site_results[f'MV_at_{k}'] = site_results.apply(lambda row: majority_voting_at_k(row, k), axis=1)
    
        for k in [3, 5]:
            site_results[f'AP_at_{k}'] = site_results.apply(lambda row: average_precision_at_k(row, k), axis=1)
            tmp.extend([site_results[f"AP_at_{k}"]])
        
        site_results[columns].to_csv(f"{model}_{exp}.csv", index=False)

In [None]:
records = []
for model in ["yottixel"]:
    # print(f"{model} Results:")
    for exp in ["ABLATION_GBM_MICROSCOPE_CPTAC", "ABLATION_GBM_MICROSCOPE_UPENN"]:
        site_results = pd.read_csv(f"data/{model}/{exp}/sub_type.csv")
        tmp = calculate_site_metrics(site_results)
        tmp.extend([exp, model])
        records.append(tmp)

columns = ["mMV_at_1", "mMV_at_3", "mMV_at_5", "mAP_at_3", "mAP_at_5", "Exp", "Model"]
df = pd.DataFrame.from_records(records, columns=columns)
df.to_csv("ablation_subtype_results.csv", index=False)

In [5]:
columns = ["MV_at_1", "MV_at_3", "MV_at_5", "AP_at_3", "AP_at_5"]
for model in ["yottixel"]:
    for exp in ["ABLATION_UCLA"]:
        site_results = pd.read_csv(f"data/{model}/{exp}/sub_type.csv")
        tmp = calculate_site_metrics(site_results)
        for k in [1, 3, 5]:
            site_results[f'MV_at_{k}'] = site_results.apply(lambda row: majority_voting_at_k(row, k), axis=1)
    
        for k in [3, 5]:
            site_results[f'AP_at_{k}'] = site_results.apply(lambda row: average_precision_at_k(row, k), axis=1)
            tmp.extend([site_results[f"AP_at_{k}"]])
        
        site_results[columns].to_csv(f"{model}_{exp}.csv", index=False)