#Metrics

Metrics calculations is as following:

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt


In [None]:
#loading the BMA file
bma_file = '/content/drive/My Drive/colab_output/bma_file_example.txt'
bma_df = pd.read_csv(bma_file, sep='\t')

#positives loaded here
positive_file = '/content/drive/My Drive/positives_added.txt' #fold-based or overall (depending if graph contains the query or not)
positive_df = pd.read_csv(positive_file, sep='\t')

#cleaning and pre-processing
bma_df['MGI'] = bma_df['MGI Entity'].str.extract(r'MGI_(\d+)')
bma_df['OMIM'] = bma_df['OMIM Entity'].str.extract(r'OMIM_(\d+)')
positive_df['MGI'] = positive_df['MGI_ID'].str.extract(r'MGI_(\d+)')
positive_df['OMIM'] = positive_df['OMIM_ID'].str.extract(r'OMIM_(\d+)')

#labelling
merged_df = pd.merge(bma_df, positive_df[['MGI', 'OMIM']], on=['MGI', 'OMIM'], how='left', indicator=True)
merged_df['y_true'] = merged_df['_merge'].apply(lambda x: 1 if x == 'both' else 0)
merged_df['y_scores'] = merged_df['BMA Similarity']

#auc computation
auc = roc_auc_score(merged_df['y_true'], merged_df['y_scores'])
print(f"AUC Score: {auc}")

#--calculations needed
#hits@k, MR, and MRR calculation
hits_at_1 = 0
hits_at_10 = 0
hits_at_30 = 0
ranks = []

grouped = merged_df.groupby('OMIM')

for omim_id, group in grouped:
    sorted_group = group.sort_values(by='y_scores', ascending=False).reset_index(drop=True)
    positive_indices = sorted_group.index[sorted_group['y_true'] == 1].tolist()

    if not positive_indices:
        continue


    rank = positive_indices[0] + 1
    ranks.append(rank)

    if rank == 1:
        hits_at_1 += 1
    if rank <= 10:
        hits_at_10 += 1
    if rank <= 30:
        hits_at_30 += 1

total = len(ranks)

#final metrics
mean_rank = sum(ranks) / total
mrr = sum(1.0 / r for r in ranks) / total
hits_at_1_score = hits_at_1 / total
hits_at_10_score = hits_at_10 / total
hits_at_30_score = hits_at_30 / total

# display results
print(f"Hits@1: {hits_at_1_score:.4f}")
print(f"Hits@10: {hits_at_10_score:.4f}")
print(f"Hits@30: {hits_at_30_score:.4f}")
print(f"Mean Rank: {mean_rank:.2f}")
print(f"MRR: {mrr:.4f}")
