In [1]:
import pandas as pd

roc_test = pd.read_csv("data/roc_nlu_test.csv")

In [2]:
roc_test.head()

Unnamed: 0.1,Unnamed: 0,context,head,relation,tail,label
0,0,['hank had to wrap a lot of gifts for his fami...,PersonX wraps gifts,xWant,give gifts,1
1,1,['hank had to wrap a lot of gifts for his fami...,PersonX wraps gifts,xWant,to deliver the gifts,0
2,2,['hank had to wrap a lot of gifts for his fami...,gift,AtLocation,wrapped container,1
3,3,['hank had to wrap a lot of gifts for his fami...,gift,AtLocation,gift shop,0
4,4,['hank had to wrap a lot of gifts for his fami...,gift,ObjectUse,bring for the birthday host,0


In [4]:
from kogito.linkers.deberta import DebertaLinker

linker = DebertaLinker()

In [10]:
def text_to_list(text):
    return [t.strip().strip("'") for t in text.strip("]").strip("[").split(",")]

In [11]:
text_to_list(roc_test.iloc[0]["context"])

['hank had to wrap a lot of gifts for his family .',
 'he ran out of wrapping paper with 4 gifts to go .',
 'he went to the kitchen and found shopping bags .',
 'he cut up the bags to make sheets of paper .',
 'he used the paper to wrap the last of the gifts .']

### Context as a list

In [14]:
from kogito.core.knowledge import KnowledgeGraph, Knowledge
from tqdm import tqdm

labels = roc_test.label.to_list()
predictions = []

for _, row in tqdm(roc_test.iterrows(), total=len(roc_test)):
    context = text_to_list(row["context"])
    graph = KnowledgeGraph([Knowledge(head=row["head"], relation=row["relation"], tails=row["tail"])])
    probs = linker.link(context=context, input_graph=graph)
    predictions.append(1 if probs[0] >= 0.5 else 0)

100%|██████████| 10854/10854 [06:35<00:00, 27.42it/s]


In [15]:
len(predictions), len(labels)

(10854, 10854)

In [16]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

print(f"Accuracy={accuracy_score(labels, predictions)}, F1={f1_score(labels, predictions)}, Recall={recall_score(labels, predictions)}, Precision={precision_score(labels, predictions)}")

Accuracy=0.8941404090657822, F1=0.7769365171811298, Recall=0.7968936678614098, Precision=0.7579545454545454


### Context as a text

In [17]:
from kogito.core.knowledge import KnowledgeGraph, Knowledge
from tqdm import tqdm

labels = roc_test.label.to_list()
predictions = []

for _, row in tqdm(roc_test.iterrows(), total=len(roc_test)):
    context = " ".join(text_to_list(row["context"]))
    graph = KnowledgeGraph([Knowledge(head=row["head"], relation=row["relation"], tails=row["tail"])])
    probs = linker.link(context=context, input_graph=graph)
    predictions.append(1 if probs[0][0] >= 0.5 else 0)

  attention_scores = torch.bmm(query_layer, key_layer.transpose(-1, -2)) / torch.tensor(
  score += c2p_att / torch.tensor(scale, dtype=c2p_att.dtype)
  score += p2c_att / torch.tensor(scale, dtype=p2c_att.dtype)
100%|██████████| 10854/10854 [08:15<00:00, 21.89it/s]


In [18]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

print(f"Accuracy={accuracy_score(labels, predictions)}, F1={f1_score(labels, predictions)}, Recall={recall_score(labels, predictions)}, Precision={precision_score(labels, predictions)}")

Accuracy=0.8944168048645661, F1=0.7775621118012422, Recall=0.7976901632815612, Precision=0.7584248390761076
