Compute metrics for the LLAMA part of the chimera model.

In [57]:
import pickle
import numpy as np
import os

In [58]:
ROOT_DIR = "/media/hdd/usr/edo/egoProcel_mistakes/results/7b_0_alpha_0_5_0.60_assembly"
PRED_FILE = os.path.join(ROOT_DIR, "llama_preds.pkl")
GTS_FILE = os.path.join(ROOT_DIR, "llama_gts.pkl")

In [59]:
# open files
preds = pickle.load(open(PRED_FILE, "rb"))
gts = pickle.load(open(GTS_FILE, "rb"))

In [60]:
len(gts.keys())

156

First and second type errors

In [61]:
tp, fp, fn, tn = 0, 0, 0, 0
count, samples = 0, 0
for k in gts.keys():
    gt = gts[k]
    pred = preds[k]
    matches = np.array([g in p for g, p in zip(gt, pred)])
    count += np.sum(matches)
    samples += len(matches)
    # print("count: {}, len: {}".format(count, len))
    # the last one is a mistake, a mismatch is expected
    print(matches)
    # all the actions all correct procedures except the last one
    correct = matches[:-1]
    mistake = matches[-1]

    for i in range(len(gt)):
        m = matches[i]
        g = gt[i]
        p = pred[i]
        print("{}, g: {}, p: {}".format(m, g, p))

    # count
    ## correct
    tp += np.sum(correct)
    fn += np.sum(~correct)
    ## mistake
    tn += int(not mistake)
    fp += int(mistake)

    print("tp: {}, fp: {}, fn: {}, tn: {}".format(tp, fp, fn, tn))

    print("-" * 30)

[ True  True False  True False False False]
True, g: attach-wheel, p: {'attach-interior', 'attach-wheel'}
True, g: attach-interior, p: {'attach-interior', 'attach-door\n', 'attach-base\n'}
False, g: attach-wheel, p: {'attach-rear', 'attach-cab', 'attach-base\n'}
True, g: attach-interior, p: {'attach-cab', 'attach-interior', 'attach-transport', 'attach-door\n'}
False, g: attach-cabin, p: {'attach-cab', 'attach-transport', 'attach-rear', 'attach-door\n'}
False, g: attach-base, p: {'attach-transport', 'attach-rear', 'attach-door\n'}
False, g: attach-roof, p: {'attach-transport', 'attach-rear', 'attach-door\n'}
tp: 3, fp: 0, fn: 3, tn: 1
------------------------------
[ True False False False]
True, g: attach-wheel, p: {'attach-wheel'}
False, g: attach-arm_connector, p: {'attach-bumper', 'attach-dump', 'attach-cab'}
False, g: attach-wheel, p: {'attach-interior', 'attach-chass', 'attach-dump', 'attach-base\n'}
False, g: attach-tilter, p: {'attach-bumper', 'attach-dump', 'attach-cab', 'attac

In [62]:
# accuracy
acc = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
ratio = count / samples

In [63]:
print("tp: {:d}, fp: {:d}, fn: {:d}, tn: {:d}".format(tp, fp, fn, tn))
print(
    "accuracy: {:.4f}\nprecision: {:.4f}\nrecall: {:.4f}\nf1: {:.4f}".format(
        acc, precision, recall, f1
    )
)
print("ratio: {:.4f}\t({:d}/{:d})".format(ratio, count, samples))

tp: 122, fp: 26, fn: 503, tn: 130
accuracy: 0.3227
precision: 0.8243
recall: 0.1952
f1: 0.3157
ratio: 0.1895	(148/781)
