In [None]:
import pandas as pd
import numpy as np
%%writefile func_emb_metrics.py

def func_emb_metrics(xlsxFile):
    """
    FUNC_EMB_METRICS calculates Top-1, Top-10, and Mean Rank from prediction results.

    Input:
        xlsxFile - Excel file containing 'Expected' column and 10 nearest neighbors

    Output:
        top1     - proportion of correct words ranked at position 1
        top10    - proportion of correct words ranked in top 10
        meanRank - average rank of correct words
    """

    T = pd.read_excel(xlsxFile)

    # Print columns to debug
    print(f"Columns in file: {T.columns.tolist()}")

    expected = T.iloc[:, 0]  # First column
    myNeighbors = T.iloc[:, 1:].values  # all columns after the first

    n = len(T)
    top1Count = 0
    top10Count = 0
    ranks = np.zeros(n)

    for i in range(n):
        currentExpected = expected[i]
        currentNeighbors = myNeighbors[i, :]

        # Remove empty entries
        currentNeighbors = [n for n in currentNeighbors if isinstance(n, str) and n != '']

        # Find rank of expected form
        matchIdx = []
        for j, neighbor in enumerate(currentNeighbors):
            if currentExpected.lower() == neighbor.lower():
                matchIdx.append(j + 1)

        if len(matchIdx) > 0:
            if matchIdx[0] == 1:
                top1Count = top1Count + 1
            top10Count = top10Count + 1
            ranks[i] = matchIdx[0]
        else:
            ranks[i] = np.nan  # mark as not found

    # Compute metrics
    top1Acc = top1Count / n
    top10Acc = top10Count / n
    meanRank = np.nanmean(ranks)

    return top1Acc, top10Acc, meanRank