In [4]:
from dotenv import load_dotenv 
load_dotenv()

from fourlang.text_to_4lang import TextTo4lang
from fourlang.lexicon import Lexicon
from graphviz import Source
from scripts.parse_data import read_sherliic, build_graph
from scripts.similarity import Similarity

from tqdm import tqdm
preds = []
data = read_sherliic("data/dev.csv", ud_path="data/relation_index.tsv", keep_context=True)
data_frame = build_graph(data)

In [7]:
data_frame["examples_A"] = data["examples_A"]
data_frame["examples_B"] = data["examples_B"]

In [9]:
data["prem_end"] = data["prem_end"].fillna('')
data["hypo_end"] = data["hypo_end"].fillna('')
data['premise_text'] = data["prem_argleft"] + " " + data["premise"].apply(lambda x: x.strip()) + " " + data["prem_argright"] + data["prem_end"]
data['hyp_text'] = data["hypo_argleft"] + " " + data["hypothesis"].apply(lambda x: x.strip()) + " " + data["hypo_argright"] + data["hypo_end"]

In [21]:
premise_texts = []
hyp_texts = []

for i, prem_text in enumerate(data["premise_text"]):
    example_A = data.iloc[i]["examples_A"].split("/")[0].strip()
    text = prem_text.replace("A", example_A)
    example_B = data.iloc[i]["examples_B"].split("/")[0].strip()
    text = text.replace("B", example_B)
    premise_texts.append(text)
    
for i, prem_text in enumerate(data["hyp_text"]):
    example_A = data.iloc[i]["examples_A"].split("/")[0].strip()
    text = prem_text.replace("A", example_A)
    example_B = data.iloc[i]["examples_B"].split("/")[0].strip()
    text = text.replace("B", example_B)
    hyp_texts.append(text)

In [24]:
data["premise_text"] = premise_texts
data["hyp_text"] = hyp_texts

In [1]:
import torch
from fairseq.data.data_utils import collate_tokens

roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.eval()

Downloading: "https://github.com/pytorch/fairseq/archive/master.zip" to /home/adaamko/.cache/torch/hub/master.zip
100%|██████████| 751652118/751652118 [01:11<00:00, 10537530.31B/s]
1042301B [00:01, 1029047.76B/s]
456318B [00:00, 594436.53B/s]


RobertaHubInterface(
  (model): RobertaModel(
    (encoder): RobertaEncoder(
      (sentence_encoder): TransformerSentenceEncoder(
        (dropout_module): FairseqDropout()
        (embed_tokens): Embedding(50265, 1024, padding_idx=1)
        (embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1)
        (layers): ModuleList(
          (0): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwi

In [25]:
batch_of_pairs = []

for i, text in enumerate(premise_texts):
    batch_of_pairs.append([text, hyp_texts[i]])

In [27]:
batch = collate_tokens(
    [roberta.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1
)

logprobs = roberta.predict('mnli', batch)
guesses = logprobs.argmax(dim=1)
# tensor([0, 2, 1, 0])

In [28]:
preds = []

for guess in guesses:
    if guess == 2:
        preds.append(1)
    else:
        preds.append(0)

In [30]:
from sklearn.metrics import precision_recall_fscore_support as pr
#bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), [1 if i>=1.0 else 0 for i in guesses])
bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), preds)

print("Precision: " +  str(bPrecis[1]))
print("Recall: " +  str(bRecall[1]))
print("Fscore: " +  str(bFscore[1]))
print(bSupport)

Precision: 0.5588235294117647
Recall: 0.8610271903323263
Fscore: 0.6777645659928656
[665 331]
