In [1]:
%load_ext autoreload
%autoreload 2

import checklist
import spacy
import itertools

import checklist.editor
import checklist.text_generation
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect
from checklist.test_suite import TestSuite
import numpy as np
import spacy
from checklist.perturb import Perturb
from checklist.pred_wrapper import PredictorWrapper
import numpy as np
import random
import torch
from transformers import pipeline
import json
import requests
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
from scipy import spatial
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import string
from collections import Counter

In [2]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
thresholds = [0.25, 0.25, 0.7,0.25]

In [4]:

def jaccard_similarity(x, y):
    intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
    union_cardinality = len(set.union(*[set(x), set(y)]))
    return intersection_cardinality/float(union_cardinality)

def tfidf_similarity(corpus):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    return cosine_sim[0][1]

def sbert_similarity(sentence, changed_sentence):
    sentences = [sentence, changed_sentence]
    embeddings = model.encode(sentences)
    result = 1 - spatial.distance.cosine(embeddings[0], embeddings[1])
    return result

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    #def remove_articles(text):
    #    return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_punc(lower(s)))


def f1_score(ground_truth, prediction):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def answer_or_not(sentence1, sentence2, thresholds):
    sentence1 = normalize_answer(sentence1)
    sentence2 = normalize_answer(sentence2)
    corpus = [sentence1, sentence2]
    s1_splitted = sentence1.split(" ")
    s2_splitted = sentence2.split(" ")
    scores = []
    scores.append(jaccard_similarity(s1_splitted, s2_splitted))
    scores.append(tfidf_similarity(corpus))
    scores.append(sbert_similarity(sentence1, sentence2))
    scores.append(f1_score(sentence1, sentence2))
    got_it = []
    for i in range(len(scores)):
        if scores[i]>thresholds[i]:
            got_it.append(1)
        else:
            got_it.append(0)
    if sum(got_it) >= 3:
        return 1
    else:
        return 0

In [5]:
def pred_answer(question, context, selected_skill_index):
    try:
        skill = selected_skills[selected_skill_index]
        skill_id = skill['id']
        query = {
            "query": question,
            "skill_args": {
                "context": context,
                "base_model": skill['default_skill_args']['base_model'],
                "adapter": skill['default_skill_args']['adapter']
            },
        }
        
        headers = {'Content-type': 'application/json'}
        json_object = json.dumps(query)
        response = requests.post("https://square.ukp-lab.de/api/skill-manager/skill/"+str(skill_id)+"/query", data = json_object, headers = headers)
        predictions = response.json()
        return predictions['predictions'][0]['prediction_output']['output']
    except Exception as ex:
        return "failed"

In [6]:
response = requests.get("https://square.ukp-lab.de/api/skill-manager/skill")
skills = response.json()
selected_skills = []
for skill in skills:
    #if "open" in skill['name'].lower() and "bio" not in skill['name'].lower():
    #if  "bio" in skill['name'].lower():
    if  "narrative" in skill['name'].lower():
        selected_skills.append(skill)
length_of_skills = len(selected_skills)
print(length_of_skills)

1


In [7]:
data_file = "narrative_qa.txt"
json_dicts = []
json_file_names = []
for i in range(length_of_skills):
    json_dicts.append(dict())
    json_dicts[i]["tests"] = []
    file_name = selected_skills[i]['id'] +'.json'
    json_file_names.append(file_name)

In [8]:
open_data = open(data_file, 'r')
questions = []
answers = []
contexts = []
file_to_write = open('narrativeqa_selected.txt','w')
for line in open_data:
    try:
        line = line.strip()
        elements = line.split("|||||")
        context = normalize_answer(elements[1])
        ans = normalize_answer(elements[2])
        elements[0] = normalize_answer(elements[0])
        print("Question")
        #print(elements[0])
        #print()
        prediction = pred_answer(elements[0], context, 0).lower()
        #print("Prediction : ")
        #print(normalize_answer(prediction))
        #print()
        #print("Answer: ")
        #print(ans)
        feedback = answer_or_not(prediction, ans, thresholds)
        print()
        if feedback == 1:
            questions.append(elements[0])
            answers.append(ans)
            contexts.append(context)
            sentence = normalize_answer(elements[0])
            sentence = sentence + "|||||"+context+"|||||"+ans.strip()
            file_to_write.write(sentence+"\n")
        if len(questions) == 500:
            break
    except Exception as ex:
        print(ex)
        continue

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question



KeyboardInterrupt: 

In [None]:
'''
#file_to_write = open('nqopen_selected.txt','w')
for i in range(len(questions)):
    sentence = questions[i]
    for j in range(len(answers[i])):
        answers[i][j] = normalize_answer(answers[i][j])
        sentence = sentence + "|||||"+answers[i][j].strip()
    file_to_write.write(sentence+"\n")
    print("Question:")
    print(questions[i])
    print("Answers:")
    print(answers[i])
    print("Prediction:")
    print(normalize_answer(predictions[i])+'\n')
    print()
'''   

In [9]:
file_to_write.close()