In [1]:
import json
import time
import requests
import re
from tqdm import tqdm_notebook as tqdm

In [2]:
headers = {
    "Content-Type": "application/json",
    'accept': 'application/json'
}

In [3]:
def json_load(name):
    with open(f'{name}', 'r', encoding = 'utf-8') as f:
        return json.load(f)
    
def json_save(name, item):
    with open(f'{name}', 'w', encoding = 'utf-8') as f:
        json.dump(item, f, ensure_ascii = False, indent = 2)

## Ask QAnswer QALD-test questions

In [50]:
qald_test = json_load("../processed_data/QALD/qald_test_wdt.json")

In [51]:
len(qald_test), qald_test[0]

(111,
 {'uid': '201',
  'question_text': 'What is the founding year of the brewery that produces Pilsner Urquell?',
  'query_wikidata': 'PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> SELECT ?date WHERE { wd:Q948831 wdt:P571 ?date . } ',
  'query_dbpedia': 'PREFIX dbo: <http://dbpedia.org/ontology/> SELECT DISTINCT xsd:date(?num) WHERE { <http://dbpedia.org/resource/Pilsner_Urquell> <http://dbpedia.org/property/brewery> ?uri . ?uri dbo:foundingYear ?num }',
  'answers': '',
  'question_text_ru': ['В каком году была основана пивоварня, производящая Pilsner Urquell?',
   'Год основаная пивоварни, производящей Pilsner Urquell?',
   'Год основания завода, который производит Pilsner Urquell?'],
  'question_text_de': ['Wann wurde die Brauerei, die Pilner Urquell braut, gegründet?']})

In [19]:
qanswer_test_responses = list()
for q in tqdm(qald_test):
    question = q['question_text']

    response = requests.get(
        "https://qanswer-core1.univ-st-etienne.fr/api/qa/full?question={0}&lang=en&kb=wikidata".format(question)
    ).json()['queries']
    
    qanswer_test_responses.append({
        'uid': q['uid'],
        'response': [{'query': r['query'], 'confidence': r['confidence']} for r in response]
    })
    time.sleep(1)
    
json_save("../processed_data/QALD/qanswer_test_responses.json", qanswer_test_responses)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=111.0), HTML(value='')))




## Execute QAnswer responses

`python scripts/qald_run_sparql_candidates_on_wikidata.py`

## Get true answers

In [52]:
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")

In [53]:
test_new = list()
for q in tqdm(qald_test):
    time.sleep(1)
    try:
        sparql.setQuery(q['query_wikidata'])
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()

        answers = list()
        if "results" in results.keys():
            for result in results["results"]["bindings"]:
                answers.append(result)
        elif "boolean" in results.keys():
            answers = results["boolean"]
    except:
        time.sleep(1)
        try:
            sparql.setQuery(q['query_wikidata'])
            sparql.setReturnFormat(JSON)
            results = sparql.query().convert()

            answers = list()
            if "results" in results.keys():
                for result in results["results"]["bindings"]:
                    answers.append(result)
            elif "boolean" in results.keys():
                answers = results["boolean"]
        except:
            print(q['query_wikidata'])
            answers = None
    
    if answers:
        q['results_wikidata'] = answers    
        test_new.append(q)
    else:
        q['results_wikidata'] = None    
        test_new.append(q)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=111.0), HTML(value='')))

 SELECT ?s1 WHERE { ?s1 <http://www.wikidata.org/prop/direct/P31> <http://www.wikidata.org/entity/Q6256> . ?s2  <http://www.wikidata.org/prop/direct/P17>  ?s1 . ?s2  <http://www.wikidata.org/prop/direct/P31>  <http://www.wikidata.org/entity/Q35509> . } GROUP BY ?s1 HAVING ( COUNT(?s2) > 2 )




NO ANSWER
SELECT ?o1 WHERE { <http://www.wikidata.org/entity/Q567 <http://www.wikidata.org/prop/direct/P1477>  ?o1 .  }
NO ANSWER
NO ANSWER
# What languages are spoken in Pakistan? PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wd: <http://www.wikidata.org/entity/> SELECT * { wd:Q843 wdt:P2936 ?uri . }
PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wd: <http://www.wikidata.org/entity/> SELECT * { wd:Q1124 wdt:P40 ?child . ?child wdt:P26 ?uri 
NO ANSWER
NO ANSWER
PREFIX p: <http://www.wikidata.org/prop/> PREFIX ps: <https://w3id.org/payswarm#> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX bd: <http://www.bigdata.com/rdf#> PREFIX wikibase: <http://wikiba.se/ontology#> SELECT DISTINCT ?uri WHERE { { ?uri ?allowed wd:Q9134365 . # british monarch VALUES ?allowed { wdt:P39 wdt:P97 } # postion held, noble title } UNION { ?uri wdt:P39 ?position . VALUES ?position 

In [54]:
json_save("../processed_data/QALD/qald_test_wdt_answers.json", test_new)

## Get Labels from QALD

`python ../scripts/get_qald_labels_wikidata.py`

In [4]:
test_labels = json_load("../processed_data/QALD/qanswer_test_responses_labels.json")

## Mark if the question was correctly answered by QAnswer

In [38]:
def precision_recall(true: list, pred: list):
    """
    Take prediction for a given question 
    and check how many of them are good ones. 
    That number divided by number of predictions gives you the Precision.
    
    Recall how many true results are in predictions
    """
    if len(true) == 0 and len(pred) == 0:
        return 1, 1
    
    intersect = len(set(pred).intersection(set(true)))
    precision = intersect/len(pred) if len(pred) > 0 else 0
    recall = intersect/len(true) if len(true) > 0 else 0
    
    return precision, recall

In [55]:
test_new = json_load("../processed_data/QALD/qald_test_wdt_answers.json")
qanswer_results_new = json_load("../processed_data/QALD/qanswer_test_responses_extended.json")

In [56]:
for i in range(len(qanswer_results_new)):
    # get true answers
    true = list()
    if type(test_new[i]['results_wikidata']) == list:
        for k in range(len(test_new[i]['results_wikidata'])):
            for value in list(test_new[i]['results_wikidata'][k].values()):
                if value['value'] not in true:
                    true.append(value['value'])
    elif type(test_new[i]['results_wikidata']) == bool:
        true = test_new[i]['results_wikidata']
    # for each query candidate get predicted answers and mark if it was true
    for j in range(len(qanswer_results_new[i]['response'])):
        predicted = list()
        if type(test_new[i]['results_wikidata']) != type(qanswer_results_new[i]['response'][j]['result']):
            qanswer_results_new[i]['response'][j]['is_true'] = False
        elif type(qanswer_results_new[i]['response'][j]['result']) == list:
            for k in range(len(qanswer_results_new[i]['response'][j]['result'])):
                for value in list(qanswer_results_new[i]['response'][j]['result'][k].values()):
                    if value['value'] not in predicted:
                        predicted.append(value['value'])

            prec, rec = precision_recall(true, predicted)
            if prec >= 0.5 and rec >= 0.5: # if candidate is correct
                qanswer_results_new[i]['response'][j]['is_true'] = True
            else:
                qanswer_results_new[i]['response'][j]['is_true'] = False
        elif type(qanswer_results_new[i]['response'][j]['result']) == bool:
            if qanswer_results_new[i]['response'][j]['result'] == true:
                qanswer_results_new[i]['response'][j]['is_true'] = True
            else:
                qanswer_results_new[i]['response'][j]['is_true'] = False

In [138]:
json_save("../processed_data/QALD/qanswer_test_responses_extended.json", qanswer_results_new)

## Evaluate the classifier

In [139]:
def precision_at_k(data, k=1):
    """
    How many relevant items are present in the top-k recommendations of the system
    """
    # TODO: implement @k
    assert k > 0
    prec = list()
    for q in data:
        cnt = 0
        for i in range(len(q['response'])):
            if i + 1 <= k: # take first k responses
                if q['response'][i]['is_true']:
                    cnt +=1
        prec.append(cnt/k)
        
    return sum(prec)/len(prec)

In [153]:
precision_at_k(qanswer_results_new, 1), precision_at_k(qanswer_results_new, 5) # for initial set

(0.2882882882882883, 0.08918918918918917)

In [156]:
# filter answer candidates and create new dataset for evaluation
qanswer_results_filtered = list()

for i in tqdm(range(len(qanswer_results_new))):
    question_text = qald_test[i]['question_text']
    batch = list()
    for j in range(len(test_labels[i]['responses'])):
        answer_text = ' '.join(t for t in test_labels[i]['responses'][j])
        batch.append([question_text, answer_text])
    
    data = json.dumps(batch, ensure_ascii=False)
    data = data.encode('ascii', 'ignore').strip()

    json_response = requests.post('http://webengineering.ins.hs-anhalt.de:41003/predict',
                                  data=data,
                                  headers=headers)

    preds = json_response.json()['predictions'] # get predictions for the q-a tuples set
    qanswer_results_filtered.append([{'is_true': result} for result in preds])

qanswer_results_filtered = [{'response': q} for q in qanswer_results_filtered]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=111.0), HTML(value='')))




In [157]:
precision_at_k(qanswer_results_filtered, k=1), precision_at_k(qanswer_results_filtered, k=5)

(0.2972972972972973, 0.2594594594594594)

In [158]:
qanswer_results_filtered

[{'response': [{'is_true': 1},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 0},
   {'is_true': 1},
   {'is_true': 1},
