In [1]:
from itertools import combinations
import numpy as np

In [2]:

def read_data(file):
    with open(file, 'r') as fin:
        n = int(fin.readline().strip())
        data = []
        unique_labels = set()
        for i, line in enumerate(fin.readlines()):
            label, proba = map(float, line.strip().split())
            data.append((proba, label))
            unique_labels.add(label)
    data = np.array(sorted(data, reverse=True))
    pairs = combinations(unique_labels, 2)
    labels = data[:, 1]
    probas = data[:, 0]
    return labels, probas, pairs

def trapezoid_area(x1,x2,y1,y2):
    base = np.abs(x1 - x2)
    av_height = (y1 + y2) / 2
    return base * av_height

def auc(l, f, p, n, pos_label):
    """ 
    calculating the area under ROC curve
    https://ccrma.stanford.edu/workshops/mir2009/references/ROCintro.pdf
    l -- labels
    f -- probabilities of pos labels
    p -- number of pos labels
    n -- number of neg labels
    """
    #f, l = zip(*sorted(zip(f, l), reverse=True)) # decreasing sort by f
    #pos_label = np.max(l)
    fp = tp = 0.
    fp_prev = tp_prev = 0.
    a = 0.
    f_prev = None
    for i in range(len(l)):
        if f[i] != f_prev:
            a += trapezoid_area(fp, fp_prev, tp, tp_prev)
            f_prev = f[i]
            fp_prev = fp
            tp_prev = tp
        if l[i] == pos_label:
            tp += 1
        else:
            fp += 1
    a += trapezoid_area(n, fp_prev, p, tp_prev)
    return a, p * n

def multiclass_auc(labels, probas, pairs):

    numerator = 0.
    denominator = 0.

    for l0, l1 in pairs:

        m0 = labels == l0
        m1 = labels == l1
        mask = m0 | m1
        
        lm = labels[mask].tolist()
        pm = probas[mask].tolist()

        num, den = auc(lm, pm, m1.sum(), m0.sum(), l1)
        numerator += num
        denominator += den

    return numerator / denominator



In [15]:
def main():
    file = './random_test.txt'
    labels, probas, pairs = read_data(file)
    auc_score = multiclass_auc(labels, probas, pairs)
    fout = open("./output.txt", "w")
    fout.write("{:.6f}".format(auc_score))
    fout.close()

In [16]:
main()