In [26]:
from dotenv import load_dotenv 
load_dotenv()

from fourlang.text_to_4lang import TextTo4lang
from fourlang.lexicon import Lexicon
from graphviz import Source
from scripts.parse_data import read_sherliic, build_graph
from scripts.similarity import Similarity

from tqdm import tqdm
preds = []
data = read_sherliic("data/test.csv", ud_path="data/relation_index.tsv", keep_context=True)
data_frame = build_graph(data)

In [27]:
data_frame["examples_A"] = data["examples_A"]
data_frame["examples_B"] = data["examples_B"]

In [28]:
data["prem_end"] = data["prem_end"].fillna('')
data["hypo_end"] = data["hypo_end"].fillna('')
data['premise_text'] = data["prem_argleft"] + " " + data["premise"].apply(lambda x: x.strip()) + " " + data["prem_argright"] + data["prem_end"]
data['hyp_text'] = data["hypo_argleft"] + " " + data["hypothesis"].apply(lambda x: x.strip()) + " " + data["hypo_argright"] + data["hypo_end"]

In [29]:
premise_texts = []
hyp_texts = []

for i, prem_text in enumerate(data["premise_text"]):
    example_A = data.iloc[i]["examples_A"].split("/")[0].strip()
    text = prem_text.replace("A", example_A)
    example_B = data.iloc[i]["examples_B"].split("/")[0].strip()
    text = text.replace("B", example_B)
    premise_texts.append(text)
    
for i, prem_text in enumerate(data["hyp_text"]):
    example_A = data.iloc[i]["examples_A"].split("/")[0].strip()
    text = prem_text.replace("A", example_A)
    example_B = data.iloc[i]["examples_B"].split("/")[0].strip()
    text = text.replace("B", example_B)
    hyp_texts.append(text)

In [30]:
data["premise_text"] = premise_texts
data["hyp_text"] = hyp_texts

In [31]:
import torch
from fairseq.data.data_utils import collate_tokens
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.eval()

Using cache found in /home/adaamko/.cache/torch/hub/pytorch_fairseq_master


RobertaHubInterface(
  (model): RobertaModel(
    (encoder): RobertaEncoder(
      (sentence_encoder): TransformerSentenceEncoder(
        (dropout_module): FairseqDropout()
        (embed_tokens): Embedding(50265, 1024, padding_idx=1)
        (embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1)
        (layers): ModuleList(
          (0): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwi

In [7]:
batch_of_pairs = []

for i, text in enumerate(premise_texts):
    batch_of_pairs.append([text, hyp_texts[i]])

In [40]:
batch_of_pairs = [
    ["A is granting to B", "A is giving to B"],
    ["A is supporter of B", "A is backing B"],
    ["A is president of B","A is representing B"],
    ["A is interviewing B", "A is asking B"],
    ["A claims B", "A is wanting B"]
]

In [41]:
with torch.no_grad():

    batch = collate_tokens(
        [roberta.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1
    )

    logprobs = roberta.predict('mnli', batch)
    print(logprobs)
    guesses = logprobs.max(dim=1)
    # tensor([0, 2, 1, 0])

tensor([[-5.0970, -4.4486, -0.0180],
        [-5.6310, -4.6256, -0.0135],
        [-6.1340, -2.8906, -0.0594],
        [-3.4675, -3.2651, -0.0719],
        [-4.4199, -0.8968, -0.5446]])


In [42]:
import numpy as np

guesses_probs = logprobs.max(dim=1)
guesses = logprobs.argmax(dim=1)

In [43]:
guesses_probs = np.exp(guesses_probs[0].detach().numpy())

In [46]:
preds = []

for guess, prob in zip(guesses, guesses_probs):
    if guess == 2 and prob >= 0.8:
        preds.append(1)
    else:
        preds.append(0)

In [47]:
preds

[1, 1, 1, 1, 0]

In [12]:
gold = data_frame.score.tolist()

In [13]:
from sklearn.metrics import precision_recall_fscore_support as pr
#bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), [1 if i>=1.0 else 0 for i in guesses])
bPrecis, bRecall, bFscore, bSupport = pr(gold, preds)

print("Precision: " +  str(bPrecis[1]))
print("Recall: " +  str(bRecall[1]))
print("Fscore: " +  str(bFscore[1]))
print(bSupport)

Precision: 0.7001845018450185
Recall: 0.7635814889336016
Fscore: 0.7305101058710299
[1995  994]


In [201]:
from sklearn.metrics import precision_recall_fscore_support as pr
#bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), [1 if i>=1.0 else 0 for i in guesses])
bPrecis, bRecall, bFscore, bSupport = pr(gold, all_rules)

print("Precision: " +  str(bPrecis[1]))
print("Recall: " +  str(bRecall[1]))
print("Fscore: " +  str(bFscore[1]))
print(bSupport)

Precision: 0.3715651135005974
Recall: 0.6257545271629779
Fscore: 0.4662668665667167
[1995  994]


In [25]:
from sklearn.metrics import precision_recall_fscore_support as pr
#bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), [1 if i>=1.0 else 0 for i in guesses])
bPrecis, bRecall, bFscore, bSupport = pr(gold, fourlang_preds)

print("Precision: " +  str(bPrecis[1]))
print("Recall: " +  str(bRecall[1]))
print("Fscore: " +  str(bFscore[1]))
print(bSupport)

Precision: 0.8640776699029126
Recall: 0.08953722334004025
Fscore: 0.16226071103008205
[1995  994]


In [24]:
fourlang_guesses = []

with open("fourlang_guesses_nofilt_nomulti_zero", "r+") as f:
    for line in f:
        fourlang_guesses.append(float(line.strip()))
        
fourlang_preds = [1 if i>=0.8 else 0 for i in fourlang_guesses]

In [199]:
all_rules = []

with open("results/wordnet+all_rules.csv", "r+") as f:
    for line in f:
        line = line.split()
        p = int(line[-1].strip())
        all_rules.append(p)
        
all_rules = all_rules[:2989]

In [203]:
combined_preds = []

for i, pred in enumerate(preds):
    if pred == 1 or fourlang_preds[i] == 1:
        combined_preds.append(1)
    else:
        combined_preds.append(0)

In [None]:
new_positives = []

for i, pred in enumerate(combined_preds):
    if pred == 1 and gold[i] == 1 and preds[i] == 0:
        new_positives.append(i)
        
new_positives

In [None]:
set(fourlang_true_pos) & set(alberta_false_neg)

In [None]:
index = 5
print(premise_texts[false_negatives[index]])
print(hyp_texts[false_negatives[index]])

In [14]:
text_to_4lang = TextTo4lang(lang="en")

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.1.0.json: 122kB [00:00, 29.1MB/s]                    
2020-12-18 14:45:09 INFO: Downloading default packages for language: en (English)...
INFO:stanza:Downloading default packages for language: en (English)...
2020-12-18 14:45:09 INFO: File exists: /home/adaamko/stanza_resources/en/default.zip.
INFO:stanza:File exists: /home/adaamko/stanza_resources/en/default.zip.
2020-12-18 14:45:13 INFO: Finished downloading models and saved to /home/adaamko/stanza_resources.
INFO:stanza:Finished downloading models and saved to /home/adaamko/stanza_resources.
2020-12-18 14:45:13 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | ewt       |
| pos       | ewt       |
| lemma     | ewt       |
| depparse  | ewt       |
| sentiment | sstplus   |
| ner       | ontonotes |

INFO:stanza:Loading these models for language: en (English):
| Process

In [15]:
import networkx as nx
from networkx import algorithms

import re
def clear_node(node):
    """
    Clears the node from the 4lang id parts
    :param node: the text to clear
    :return: the cleared text
    """
    node = re.sub(r'_[0-9][0-9]*', '', node)
    #if node in stopwords:
    #    node = "STOPWORD"
    return node

def whitelisting(graph, from_node):
    whitelist = []
    zero_graph = nx.MultiDiGraph()
    zero_graph.add_node(from_node)
    delete_list = []
    for edge in graph.G.edges(data=True):
        if not edge[2]["color"]:
            zero_graph.add_edge(edge[0], edge[1])
            
    for node in zero_graph.nodes():
        if algorithms.has_path(zero_graph, from_node, node):
            if node != from_node:
                whitelist.append(node)
    
    """
    for edge in zero_graph.edges(data=True):
        if edge[0] == from_node:
            whitelist.append(edge[1])
    """
    
    return whitelist

In [None]:
graph_test = text_to_4lang.process_text("A is colonizing B", method="expand", depth=1, blacklist=["in", "on", "of"], filt=False, black_or_white="black", multi_definition=False, apply_from_depth=2)
print(graph_test.root)

In [None]:
whitelisting(graph_test, graph_test.root)

In [16]:
from graphviz import Source
from tqdm import tqdm

In [17]:
preds = []

with torch.no_grad():
    for i, (prem, hyp) in enumerate(zip(premise_texts, hyp_texts)):
        #print(i)
        if guesses[i] == 2 and guesses_probs[i] >= 0.8:
            print("True found..")
            preds.append(1)
        else:
            batches = []
            
            text_to_process = data.iloc[i]["premise_text"]
            graph = text_to_4lang.process_text(text_to_process, method="expand", depth=1, blacklist=["in", "on", "of"], filt=False, black_or_white="black", multi_definition=False, apply_from_depth=2)

            if graph.root:
                reducing_nodes = whitelisting(graph, graph.root)
                graph_root = clear_node(graph.root)
                for node in reducing_nodes:
                    node = clear_node(node)
                    if node != "A" and node != "B":
                        
                        prem_reduced = prem
                        for word in prem_reduced.split():
                            if word.startswith(graph_root) and (node not in prem_reduced.split()):
                                prem_reduced = prem.replace(word, node)
                                batches.append([prem_reduced, hyp])
                label = 0
                if batches:
                    batch = collate_tokens(
                        [roberta.encode(pair[0], pair[1]) for pair in batches], pad_idx=1
                    )

                    logprobs_batch = roberta.predict('mnli', batch)
                    reduced_guesses = logprobs_batch.argmax(dim=1)
                    reduced_guesses_probs = logprobs_batch.max(dim=1)
                    reduced_guesses_probs = np.exp(reduced_guesses_probs[0].detach().numpy())

                    for guess, prob in zip(reduced_guesses, reduced_guesses_probs):
                        if guess == 2 and prob >= 0.95:
                            label = 1
                    
                    if label == 1 and gold[i] == 0:
                        print(batches)
                        print(text_to_process)
                        print(f"reduced: {reduced_guesses}")
                preds.append(label)

            else:
                preds.append(0)




[['Indiana Pacers is follow George W. Bush', 'George W. Bush is losing to Indiana Pacers'], ['Indiana Pacers is tail George W. Bush', 'George W. Bush is losing to Indiana Pacers'], ['Indiana Pacers is behind George W. Bush', 'George W. Bush is losing to Indiana Pacers']]
Indiana Pacers is trailing George W. Bush
reduced: tensor([2, 2, 0])
True found..




True found..




True found..
True found..




True found..
True found..




True found..
True found..




True found..
True found..




[['Arthur Greiser is imply to Alexander Hamilton', 'Arthur Greiser is writing to Alexander Hamilton'], ['Arthur Greiser is QUOTEs to Alexander Hamilton', 'Arthur Greiser is writing to Alexander Hamilton']]
Arthur Greiser is suggesting to Alexander Hamilton
reduced: tensor([2, 2])




True found..
True found..




True found..




True found..




True found..




True found..




True found..




True found..
True found..




True found..
True found..
True found..




True found..




True found..




True found..




True found..
True found..




True found..




True found..
True found..




True found..
True found..




[['India is follow in Japan', 'India is wanting Japan'], ['India is letter in Japan', 'India is wanting Japan'], ['India is come in Japan', 'India is wanting Japan'], ['India is take in Japan', 'India is wanting Japan'], ['India is after in Japan', 'India is wanting Japan'], ['India is hence in Japan', 'India is wanting Japan'], ['India is next in Japan', 'India is wanting Japan'], ['India is alphabet in Japan', 'India is wanting Japan'], ['India is I in Japan', 'India is wanting Japan']]
India is succeeding in Japan
reduced: tensor([2, 1, 2, 2, 2, 2, 2, 1, 1])
True found..




True found..
True found..
True found..




True found..
True found..




True found..
True found..




True found..
True found..




True found..




[['Morgan Freeman is act a Wolverine', 'Morgan Freeman is playing Wolverine'], ['Morgan Freeman is c a Wolverine', 'Morgan Freeman is playing Wolverine'], ['Morgan Freeman is select a Wolverine', 'Morgan Freeman is playing Wolverine']]
Morgan Freeman is casting a Wolverine
reduced: tensor([2, 2, 2])




True found..
True found..
True found..




True found..
True found..
True found..




[['United States of America is give United States of America', 'United States of America is giving United States of America'], ['United States of America is State United States of America', 'United States of America is giving United States of America'], ['United States of America is Q30 United States of America', 'United States of America is giving United States of America']]
United States of America is abandoning United States of America
reduced: tensor([2, 2, 2])
True found..




True found..




True found..
True found..
True found..
True found..
True found..




[['Japan is follow in United States of America', 'Japan is coming to United States of America'], ['Japan is q17 in United States of America', 'Japan is coming to United States of America'], ['Japan is come in United States of America', 'Japan is coming to United States of America'], ['Japan is take in United States of America', 'Japan is coming to United States of America'], ['Japan is after in United States of America', 'Japan is coming to United States of America'], ['Japan is hence in United States of America', 'Japan is coming to United States of America'], ['Japan is next in United States of America', 'Japan is coming to United States of America'], ['Japan is locate in United States of America', 'Japan is coming to United States of America']]
Japan is succeeding in United States of America
reduced: tensor([2, 2, 2, 2, 2, 2, 2, 1])
True found..




True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..




True found..




True found..




[['United Kingdom is person in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is Q29cont in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is realm in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is invest in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is make in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is most in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is peninsula in Spain', 'Spain is taking United Kingdom'], ['United Kingdom is have in Spain', 'Spain is taking United Kingdom']]
United Kingdom is investor in Spain
reduced: tensor([2, 2, 2, 0, 2, 2, 2, 2])




True found..




True found..




True found..




True found..
True found..
True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..




[['Melbourne is bigger of Tamil Nadu', 'Melbourne is capital of state of Tamil Nadu'], ['Melbourne is c of Tamil Nadu', 'Melbourne is capital of state of Tamil Nadu'], ['Melbourne is settlement of Tamil Nadu', 'Melbourne is capital of state of Tamil Nadu'], ['Melbourne is large of Tamil Nadu', 'Melbourne is capital of state of Tamil Nadu']]
Melbourne is city of Tamil Nadu
reduced: tensor([1, 2, 2, 1])




True found..
True found..




True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..




True found..




True found..




True found..




True found..




True found..
True found..




True found..




True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..




[['Carthage is contend in Spain', 'Carthage is remaining in Spain'], ['Carthage is c in Spain', 'Carthage is remaining in Spain']]
Carthage is fighting in Spain
reduced: tensor([2, 2])
True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..




True found..




True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




[["New York City is Western Australia's bigger", 'New York City is heart of Western Australia'], ["New York City is Western Australia's center", 'New York City is heart of Western Australia'], ["New York City is Western Australia's settlement", 'New York City is heart of Western Australia'], ["New York City is Western Australia's large", 'New York City is heart of Western Australia'], ["New York City is Western Australia's colloquial", 'New York City is heart of Western Australia']]
New York City is Western Australia's city
reduced: tensor([1, 2, 2, 1, 1])




True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
[['Winston Churchill is use Vladimir Putin', 'Winston Churchill is meeting with Vladimir Putin'], ['Winston Churchill is male Vladimir Putin', 'Winston Churchill is meeting with Vladimir Putin'], ['Winston Churchill is device Vladimir Putin', 'Winston Churchill is meeting with Vladimir Putin'], ['Winston Churchill is derive Vladimir Putin', 'Winston Churchill is meeting with Vladimir Putin']]
Winston Churchill is pressing Vladimir Putin
reduced: tensor([2, 1, 2, 2])
True found..




True found..
True found..
True found..




[['John McCain is conversation in England', 'John McCain is arriving in England'], ['John McCain is biblical in England', 'John McCain is arriving in England']]
John McCain is calling in England
reduced: tensor([2, 2])




True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..




[['Democratic Party is party to United Kingdom', 'Democratic Party is supporting United Kingdom'], ['Democratic Party is have to United Kingdom', 'Democratic Party is supporting United Kingdom'], ['Democratic Party is legal to United Kingdom', 'Democratic Party is supporting United Kingdom']]
Democratic Party is similar to United Kingdom
reduced: tensor([2, 2, 2])
True found..




True found..
True found..
True found..




[['Soviet Union is hostile of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is q159a of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is arrangement of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is country of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is X0 of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is X2 of Russia', 'Soviet Union is having relation with Russia'], ['Soviet Union is X10 of Russia', 'Soviet Union is having relation with Russia']]
Soviet Union is enemy of Russia
reduced: tensor([0, 2, 2, 2, 2, 2, 2])
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




[['Rick Santorum is element in Iowa', 'Rick Santorum is coming in Iowa'], ['Rick Santorum is back in Iowa', 'Rick Santorum is coming in Iowa'], ['Rick Santorum is pliable in Iowa', 'Rick Santorum is coming in Iowa'], ['Rick Santorum is heavy in Iowa', 'Rick Santorum is coming in Iowa']]
Rick Santorum is leading in Iowa
reduced: tensor([2, 2, 2, 2])
True found..
True found..




True found..




True found..
True found..
True found..




[['Fred Thompson is loud in Iowa', 'Fred Thompson is winning in Iowa'], ['Fred Thompson is derogatory in Iowa', 'Fred Thompson is winning in Iowa'], ['Fred Thompson is communicate in Iowa', 'Fred Thompson is winning in Iowa'], ['Fred Thompson is out in Iowa', 'Fred Thompson is winning in Iowa'], ['Fred Thompson is say in Iowa', 'Fred Thompson is winning in Iowa']]
Fred Thompson is speaking in Iowa
reduced: tensor([1, 0, 2, 0, 1])
True found..
True found..




True found..
True found..




[['Rick Santorum is stream George W. Bush', 'Rick Santorum is urging George W. Bush'], ['Rick Santorum is back George W. Bush', 'Rick Santorum is urging George W. Bush'], ['Rick Santorum is forcible George W. Bush', 'Rick Santorum is urging George W. Bush']]
Rick Santorum is blasting George W. Bush
reduced: tensor([2, 2, 2])
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..




[['Turkey is figure into Mexico', 'Turkey is seeking in Mexico'], ['Turkey is country into Mexico', 'Turkey is seeking in Mexico'], ['Turkey is consist into Mexico', 'Turkey is seeking in Mexico'], ['Turkey is geometrical into Mexico', 'Turkey is seeking in Mexico'], ['Turkey is intersection into Mexico', 'Turkey is seeking in Mexico']]
Turkey is crossing into Mexico
reduced: tensor([2, 2, 2, 1, 2])




True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
[['Winston Churchill is use Vladimir Putin', 'Winston Churchill is supporting Vladimir Putin'], ['Winston Churchill is male Vladimir Putin', 'Winston Churchill is supporting Vladimir Putin'], ['Winston Churchill is device Vladimir Putin', 'Winston Churchill is supporting Vladimir Putin'], ['Winston Churchill is derive Vladimir Putin', 'Winston Churchill is supporting Vladimir Putin']]
Winston Churchill is pressing Vladimir Putin
reduced: tensor([2, 1, 2, 2])
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..




True found..
True found..




[['Mikhail Gorbachev is place a president of Harvard University', 'Mikhail Gorbachev is President of Harvard University'], ['Mikhail Gorbachev is hand a president of Harvard University', 'Mikhail Gorbachev is President of Harvard University']]
Mikhail Gorbachev is resigning a president of Harvard University
reduced: tensor([2, 2])
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..




True found..




True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..




True found..




True found..
True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..




[['Google is employment on Android', 'Google is releasing for Android'], ['Google is trademark on Android', 'Google is releasing for Android']]
Google is working on Android
reduced: tensor([2, 2])
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..
[['Joseph Stalin is cause Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is son Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is Jacob Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is wife Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is Rachel Russia', 'Joseph Stalin is leading Russia']]
Joseph Stalin is destroying Russia
reduced: tensor([2, 2, 2, 0, 2])
True found..
True found..
True found..
True found..
True found..




[['Osama bin Laden is put in Iraq', 'Osama bin Laden is going into Iraq'], ['Osama bin Laden is extinguish in Iraq', 'Osama bin Laden is going into Iraq']]
Osama bin Laden is killing in Iraq
reduced: tensor([2, 0])
True found..




True found..
True found..
True found..




True found..




True found..




[["New York City is Western Australia's bigger", 'New York City is centre of Western Australia'], ["New York City is Western Australia's center", 'New York City is centre of Western Australia'], ["New York City is Western Australia's settlement", 'New York City is centre of Western Australia'], ["New York City is Western Australia's large", 'New York City is centre of Western Australia'], ["New York City is Western Australia's colloquial", 'New York City is centre of Western Australia']]
New York City is Western Australia's city
reduced: tensor([1, 2, 2, 1, 1])
True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




[['Canada is have to Australia', 'Canada is joining with Australia'], ['Canada is informal to Australia', 'Canada is joining with Australia']]
Canada is similar to Australia
reduced: tensor([2, 2])




True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..




[['India is acquire on United States of America', 'India is accusing United States of America'], ['India is letter on United States of America', 'India is accusing United States of America'], ['India is alphabet on United States of America', 'India is accusing United States of America'], ['India is I on United States of America', 'India is accusing United States of America']]
India is gaining on United States of America
reduced: tensor([2, 2, 2, 2])
True found..




True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..




True found..
True found..




True found..
[['Russia is make to invade Iraq', 'Russia is attacking Iraq'], ['Russia is q159a to invade Iraq', 'Russia is attacking Iraq'], ['Russia is use to invade Iraq', 'Russia is attacking Iraq'], ['Russia is country to invade Iraq', 'Russia is attacking Iraq']]
Russia is threatening to invade Iraq
reduced: tensor([2, 2, 2, 2])




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..




True found..




True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..




[['India is use United States of America', 'India is from United States of America'], ['India is letter United States of America', 'India is from United States of America'], ['India is alphabet United States of America', 'India is from United States of America'], ['India is I United States of America', 'India is from United States of America']]
India is fearing United States of America
reduced: tensor([2, 2, 2, 2])
True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..




[['Joseph Stalin is cause Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is son Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is Jacob Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is wife Russia', 'Joseph Stalin is leading Russia'], ['Joseph Stalin is Rachel Russia', 'Joseph Stalin is leading Russia']]
Joseph Stalin is destroying Russia
reduced: tensor([2, 2, 2, 0, 2])
True found..
[['United States of America is use France', 'United States of America is backing France'], ['United States of America is State France', 'United States of America is backing France'], ['United States of America is Q30 France', 'United States of America is backing France'], ['United States of America is device France', 'United States of America is backing France']]
United States of America is pressing France
reduced: tensor([2, 2, 2, 2])
True found..




True found..
True found..




True found..
[['Rick Santorum is element in Iowa', 'Rick Santorum is returning to Iowa'], ['Rick Santorum is back in Iowa', 'Rick Santorum is returning to Iowa'], ['Rick Santorum is pliable in Iowa', 'Rick Santorum is returning to Iowa'], ['Rick Santorum is heavy in Iowa', 'Rick Santorum is returning to Iowa']]
Rick Santorum is leading in Iowa
reduced: tensor([2, 2, 1, 1])




True found..




True found..
True found..
True found..




True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
[['Joseph Stalin is cause Russia', 'Joseph Stalin is ruling Russia'], ['Joseph Stalin is son Russia', 'Joseph Stalin is ruling Russia'], ['Joseph Stalin is Jacob Russia', 'Joseph Stalin is ruling Russia'], ['Joseph Stalin is wife Russia', 'Joseph Stalin is ruling Russia'], ['Joseph Stalin is Rachel Russia', 'Joseph Stalin is ruling Russia']]
Joseph Stalin is destroying Russia
reduced: tensor([2, 2, 2, 2, 2])
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
[['Turkey is figure into Mexico', 'Turkey is sending into Mexico'], ['Turkey is country into Mexico', 'Turkey is sending into Mexico'], ['Turkey is consist into Mexico', 'Turkey is sending into Mexico'], ['Turkey is geometrical into Mexico', 'Turkey is sending into Mexico'], ['Turkey is intersection into Mexico', 'Turkey is sending into Mexico']]
Turkey is crossing into Mexico
reduced: tensor([2, 2, 2, 2, 2])
True found..
True found..




True found..
True found..




True found..
True found..
True found..
[['England national rugby union team is act against Denver England national rugby union teamroncos', 'England national rugby union team is losing to Denver England national rugby union teamroncos'], ['England national rugby union team is set against Denver England national rugby union teamroncos', 'England national rugby union team is losing to Denver England national rugby union teamroncos'], ['England national rugby union team is horse against Denver England national rugby union teamroncos', 'England national rugby union team is losing to Denver England national rugby union teamroncos'], ['England national rugby union team is animal against Denver England national rugby union teamroncos', 'England national rugby union team is losing to Denver England national rugby union teamroncos'], ['England national rugby union team is front against Denver England national rugby union teamroncos', 'England national rugby union team is losing to Denver Englan



True found..
True found..




True found..
True found..
True found..
True found..




True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..




[['Ted Kennedy is take for John McCain', 'John McCain is picking Ted Kennedy'], ['Ted Kennedy is Edward for John McCain', 'John McCain is picking Ted Kennedy']]
Ted Kennedy is campaigning for John McCain
reduced: tensor([2, 2])




True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..




[['Nokia is collision Apple Inc.', 'Apple Inc. is getting Nokia'], ['Nokia is c Apple Inc.', 'Apple Inc. is getting Nokia'], ['Nokia is crash Apple Inc.', 'Apple Inc. is getting Nokia'], ['Nokia is violent Apple Inc.', 'Apple Inc. is getting Nokia'], ['Nokia is destruction Apple Inc.', 'Apple Inc. is getting Nokia'], ['Nokia is Tampere Apple Inc.', 'Apple Inc. is getting Nokia']]
Nokia is crushing Apple Inc.
reduced: tensor([2, 2, 2, 0, 0, 2])
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..
[['Mikhail Gorbachev is place a president of Harvard University', 'Mikhail Gorbachev is president of Harvard University'], ['Mikhail Gorbachev is hand a president of Harvard University', 'Mikhail Gorbachev is president of Harvard University']]
Mikhail Gorbachev is resigning a president of Harvard University
reduced: tensor([2, 2])
True found..
True found..
True found..
True found..
[['Bill Clinton is take for John McCain', 'John McCain is wanting Bill Clinton'], ['Bill Clinton is nickname for John McCain', 'John McCain is wanting Bill Clinton'], ['Bill Clinton is slang for John McCain', 'John McCain is wanting Bill Clinton'], ['Bill Clinton is constabulary for John McCain', 'John McCain is wanting Bill Clinton'], ['Bill Clinton is british for John McCain', 'John McCain is wanting Bill Clinton']]
Bill Clinton is campaigning for John McCain
reduced: tensor([2, 1, 1, 2, 2])
True found..
True found..
True found..
True found..
True found..
True found..



True found..




True found..




True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..
True found..
[['India is acquire on United States of America', 'India is siding with United States of America'], ['India is letter on United States of America', 'India is siding with United States of America'], ['India is alphabet on United States of America', 'India is siding with United States of America'], ['India is I on United States of America', 'India is siding with United States of America']]
India is gaining on United States of America
reduced: tensor([2, 2, 2, 2])
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..




True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




[['Ronald Reagan is remain in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.'], ['Ronald Reagan is borrow in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.'], ['Ronald Reagan is rest in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.'], ['Ronald Reagan is abide in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.'], ['Ronald Reagan is rest in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.'], ['Ronald Reagan is male in Washington, D.C.', 'Ronald Reagan is arriving in Washington, D.C.']]
Ronald Reagan is sitting in Washington, D.C.
reduced: tensor([0, 2, 0, 2, 0, 1])
True found..




True found..
True found..
True found..




[['India is perception United Nations', 'India is feeling about United Nations'], ['India is letter United Nations', 'India is feeling about United Nations'], ['India is visual United Nations', 'India is feeling about United Nations'], ['India is physical United Nations', 'India is feeling about United Nations'], ['India is alphabet United Nations', 'India is feeling about United Nations'], ['India is I United Nations', 'India is feeling about United Nations']]
India is viewing United Nations
reduced: tensor([2, 2, 2, 1, 2, 2])
True found..
True found..
True found..
True found..
True found..




True found..
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
[['Cuba is being move by Japan', 'Japan is controlling Cuba']]
Cuba is being invaded by Japan
reduced: tensor([2])




[["Shenzhen is indefinite of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is c of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is bigger of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is province of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is Guangdong of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is largest of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is third of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is settlement of United Kingdom's city", 'Shenzhen is centre in United Kingdom'], ["Shenzhen is large of United Kingdom's city", 'Shenzhen is centre in United Kingdom']]
Shenzhen is one of United Kingdom's city
reduced: tensor([2, 2, 1, 2, 2, 2, 0, 2, 2])
True found..
True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..
True found..
True found..




True found..




True found..




True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..
True found..




True found..
True found..




True found..
True found..




True found..




[['San Antonio Spurs is act at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is group at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is ethnic at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is Africa at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is nonHYPHENbantu at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is forage at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden'], ['San Antonio Spurs is southwestern at TD San Antonio Spursank Garden', 'San Antonio Spurs is losing at TD San Antonio Spursank Garden']]
San Antonio Spurs is playing at TD San Antonio Spursank Garden
re



True found..




True found..




[['FOX is cross Futurama', 'Futurama is airing on FOX'], ['FOX is carnivore Futurama', 'Futurama is airing on FOX'], ['FOX is vulpe Futurama', 'Futurama is airing on FOX']]
FOX is cancelling Futurama
reduced: tensor([2, 2, 2])
True found..
True found..




True found..
True found..
[['Google is move against AFC North', 'Google is losing to AFC North'], ['Google is trademark against AFC North', 'Google is losing to AFC North']]
Google is going against AFC North
reduced: tensor([0, 2])
True found..
True found..
True found..
True found..
True found..


KeyboardInterrupt: 

## Semeval

In [215]:
from scripts.parse_data import read
import os
import pandas as pd

In [216]:
def read(lang1, lang2=None, graded=True):
    filename = "SemEval2020-Task2-Dev"
    language_name = ""
    if lang2 is None:
        filename = os.path.join(filename, "monolingual")
        language_name = lang1
    else:
        filename = os.path.join(filename, "cross-lingual")
        language_name = "-".join([lang1, lang2]) if lang1[0] < lang2[0] else "-".join([lang2, lang1])
    grad_or_bin = "graded" if graded else "binary"
    filename = os.path.join(filename, grad_or_bin, ".".join([language_name, grad_or_bin, "dev.data.txt"]))
    df = pd.read_csv(filename, delimiter=" ", header=None, names=["premise", "hypothesis", "score"])
    # if lang2 is not None:
    #     df.premise = df.premise.str.replace(lang1 + "_", "")
    #     df.premise = df.premise.str.replace(lang2 + "_", "")
    #     df.hypothesis = df.hypothesis.str.replace(lang1 + "_", "")
    #     df.hypothesis = df.hypothesis.str.replace(lang2 + "_", "")
    return df

In [217]:
data_frame = read("en", graded=False)

In [218]:
data_frame

Unnamed: 0,premise,hypothesis,score
0,custard,building,0
1,flashlight,agency,0
2,sandwich,food,1
3,enemy,friend,0
4,incense,aroma,1
...,...,...,...
311,selection,choice,1
312,material,rubber,0
313,beat,defeat,1
314,ape,beast,1


In [219]:
batch_of_pairs = []
premise_texts = data_frame["premise"].tolist()
hyp_texts = data_frame["hypothesis"].tolist()

for i, text in enumerate(premise_texts):
    batch_of_pairs.append([text, hyp_texts[i]])

In [None]:
with torch.no_grad():

    batch = collate_tokens(
        [roberta.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1
    )

    logprobs = roberta.predict('mnli', batch)
    print(logprobs)
    guesses = logprobs.max(dim=1)

In [221]:
import numpy as np

guesses_probs = logprobs.max(dim=1)
guesses = logprobs.argmax(dim=1)

In [222]:
guesses_probs = np.exp(guesses_probs[0].detach().numpy())

In [229]:
preds = []

for guess, prob in zip(guesses, guesses_probs):
    if guess == 2:# and prob >= 0.8:
        preds.append(1)
    else:
        preds.append(0)

In [230]:
gold = data_frame.score.tolist()

In [231]:
from sklearn.metrics import precision_recall_fscore_support as pr
#bPrecis, bRecall, bFscore, bSupport = pr(data_frame.score.tolist(), [1 if i>=1.0 else 0 for i in guesses])
bPrecis, bRecall, bFscore, bSupport = pr(gold, preds)

print("Precision: " +  str(bPrecis[1]))
print("Recall: " +  str(bRecall[1]))
print("Fscore: " +  str(bFscore[1]))
print(bSupport)

Precision: 0.8325123152709359
Recall: 0.949438202247191
Fscore: 0.8871391076115485
[138 178]
