In [1]:
import os
import numpy as np
import pandas as pd
import pickle as pk
import torch
from sklearn.metrics import roc_auc_score

dat_path = 'datasets/MSCRED/MSCRED.pk'

In [2]:
with open(dat_path, 'rb') as pkf:
    dat = pk.load(pkf)
dat['x_tst'][0].shape, dat['x_tst'][1].shape, dat['x_tst'][2].shape, dat['x_tst'][3].shape, dat['x_tst'][4].shape

((1720, 30), (950, 30), (3070, 30), (650, 30), (1290, 30))

In [3]:
x_tst = []
lab_tst = []

for tst in dat['x_tst']:
    x_tst.append(tst)
x_tst = np.concatenate(x_tst, axis=0)

for lab in dat['lab_tst']:
    lab_tst.append(lab)
lab_tst = np.concatenate(lab_tst, axis=0)

x_tst.shape, lab_tst.shape

((7680, 30), (7680,))

In [34]:
scores = (x_tst ** 2).sum(axis=1)

In [6]:
lab_tst.sum() / 7680


0.0234375

In [46]:
for chn in range(x_tst.shape[1]):
    scores = x_tst[:,chn]
    res = get_bestF1(lab=lab_tst, scores=scores, PA=False)
    print(res['F1'])

0.04607115433836703
0.04607221431344636
0.04614387556707713
0.0667779632721202
0.06751954513148543
0.046651617757712566
0.04717484008528784
0.04608294930875576
0.07145692735212386
0.09259259259259259
0.045842353240799695
0.05455672659640421
0.046130189646335217
0.06685981724983286
0.06504065040650406
0.07116564417177915
0.045988758303525806
0.05500982318271119
0.04568074518310578
0.04623683534549191
0.07153356197942186
0.04741298212605833
0.045931758530183726
0.07453416149068323
0.07380607814761214
0.07417582417582418
0.05514864282636795
0.046293888166449935
0.0461155508974191
0.04659166115155526


In [23]:

def get_bestF1(lab, scores, PA=False):
    scores = scores.numpy() if torch.is_tensor(scores) else scores
    lab = lab.numpy() if torch.is_tensor(lab) else lab
    ones = lab.sum()
    zeros = len(lab) - ones
    
    sortid = np.argsort(scores - lab * 1e-16)
    new_lab = lab[sortid]
    new_scores = scores[sortid]
    
    if PA:
        lab_diff = np.insert(lab, len(lab), 0) - np.insert(lab, 0, 0)
        a_st = np.arange(len(lab)+1)[lab_diff == 1]
        a_ed = np.arange(len(lab)+1)[lab_diff == -1]

        thres_a = np.array([np.max(scores[a_st[i]:a_ed[i]]) for i in range(len(a_st))])
        sort_a_id = np.flip(np.argsort(thres_a)) # big to small
        cum_a = np.cumsum(a_ed[sort_a_id] - a_st[sort_a_id])

        last_thres = np.inf
        TPs = np.zeros_like(new_lab)
        for i, a_id in enumerate(sort_a_id):
            TPs[(thres_a[a_id] <= new_scores) & (new_scores < last_thres)] = cum_a[i-1] if i > 0 else 0
            last_thres = thres_a[a_id]
        TPs[new_scores < last_thres] = cum_a[-1]
    else:
        TPs = np.cumsum(-new_lab) + ones
        
    FPs = np.cumsum(new_lab-1) + zeros
    FNs = ones - TPs
    TNs = zeros - FPs
    
    N = len(lab) - np.flip(TPs > 0).argmax()
    TPRs = TPs[:N] / ones
    PPVs = TPs[:N] / (TPs + FPs)[:N]
    FPRs = FPs[:N] / zeros
    F1s  = 2 * TPRs * PPVs / (TPRs + PPVs)
    maxid = np.argmax(F1s)
    
    FPRs = np.insert(FPRs, -1, 0)
    TPRs = np.insert(TPRs, -1, 0)
    if PA:
        AUC = ((TPRs[:-1] + TPRs[1:]) * (FPRs[:-1] - FPRs[1:])).sum() * 0.5
    else:
        AUC = roc_auc_score(lab, scores)
   
    anomaly_ratio = ones / len(lab) 
    FPR_bestF1_TPR1 = anomaly_ratio / (1-anomaly_ratio) * (2 / F1s[maxid] - 2)
    TPR_bestF1_FPR0 = F1s[maxid] / (2 - F1s[maxid])
    return {'AUC': AUC, 'F1': F1s[maxid], 'thres': new_scores[maxid], 'TPR': TPRs[maxid], 'PPV': PPVs[maxid], 
            'FPR': FPRs[maxid], 'maxid': maxid, 'FPRs': FPRs, 'TPRs': TPRs, 
            'FPR_bestF1_TPR1': FPR_bestF1_TPR1, 'TPR_bestF1_FPR0': TPR_bestF1_FPR0}   