In [161]:
import torch

import pandas as pd
import numpy as np
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import requests 
import itertools
import spotlight
import tagme
import inflect
p = inflect.engine()
import re
import sys

from nltk.stem.porter import *
stemmer = PorterStemmer()

tagme.GCUBE_TOKEN = ""

In [209]:
with open('templates.json') as f:
    templates = json.load(f)

with open('data/lc-quad/test/id.txt') as f:
    ids = f.read().strip().split('\n')
    
with open('data/lc-quad/test/input.txt') as f:
    questions = f.read().strip().split('\n')
    
def preprocess_relations(file, prop=False):
    relations = {}
    with open(file) as f:
        content = f.readlines()
        for line in content:
            split_line = line.split()

            key = ' '.join(split_line[2:])[1:-3].lower()
            key = ' '.join([stemmer.stem(word) for word in key.split()])
            
            if key not in relations:
                relations[key] = []
            
            uri = split_line[0].replace('<', '').replace('>', '')
            
            if prop is True:                
                uri_property = uri.replace('/ontology/', '/property/')
                relations[key].extend([uri, uri_property])
            else:
                relations[key].append(uri)                
    return relations    

properties = preprocess_relations('dbpedia_3Eng_property.ttl', True)
classes = preprocess_relations('dbpedia_3Eng_class.ttl')

In [5]:
# Prediction k-templates
saved_model = torch.load('checkpoints/Even more grammar changes,epoch=6,test_acc=0.8246887966804979.pt')
trainer = saved_model['trainer']
test_dataset = torch.load('data/lc-quad/pth/lc_quad_test.pth')

In [4]:
y_true = []
y_pred = []
output_vocab = trainer.vocabs['output']

for index in range(len(test_dataset)):
    torch.no_grad()    
    _, toks_sent, _, _, _ = test_dataset[index]
    tree, emb, target = trainer.get_data(test_dataset[index], test_dataset.num_classes)
    output = trainer.model.forward(tree, emb, training=False)
    _, pred = torch.topk(output[0].squeeze(0), 2)
    
    pred = pred.numpy()
    target = target.numpy()
    
    pred_0 = output_vocab.idxToLabel[pred[0]]
    pred_1 = output_vocab.idxToLabel[pred[1]]
    target = output_vocab.idxToLabel[target[0]]    
    
    y_true.append(target)
    y_pred.append([pred_0, pred_1])

KeyboardInterrupt: 

In [6]:
# Sorts based on descending order of values
def sort_dict_by_values(dictionary):
    keys = []
    for key, value in sorted(dictionary.items(), key=lambda item: (item[1], item[0]), reverse=True):
        keys.append(key)
    return keys

def get_earl_entities(query):
    THRESHOLD = 0.0001
    MAX_RESOURCES = 3
    MAX_PREDICATES = 5
    MAX_CLASSES = 3
    
    response = requests.post('http://sda.tech/earl/api/processQuery', 
                             json={"nlquery": query, "pagerankflag": False})
    
    json_response = json.loads(response.text)
    r_dict = {}
    c_dict = {}
    p_dict = {}
    
    reranked_lists = json_response['rerankedlists']
    for key in reranked_lists.keys():
        for result in reranked_lists[key]:
            if result[0] < THRESHOLD:
                continue                
            
            if result[1].startswith('http://dbpedia.org/resource/'):
                r_dict[result[1]] = result[0]
            elif result[1].startswith('http://dbpedia.org/'): 
                # Add to Classes
                if result[1].split('/')[-1][0].isupper(): 
                    c_dict[result[1]] = result[0]
                # Add to Predicates
                else:
                    p_dict[result[1]] = result[0]
        
    return {
        'r': sort_dict_by_values(r_dict)[:MAX_RESOURCES], 
        'p': sort_dict_by_values(p_dict)[:MAX_PREDICATES], 
        'c': sort_dict_by_values(c_dict)[:MAX_CLASSES]
    }

In [7]:
def get_tag_me_entities(query):
    MAX_ENTITIES = 5
    results = []
    response = requests.get("https://tagme.d4science.org/tagme/tag?lang=en&gcube-token={}&text={}"
                           .format('1b4eb12e-d434-4b30-8c7f-91b3395b96e8-843339462', query))
    
    annotations = {}
    for annotation in json.loads(response.text)['annotations']:        
        annotations['http://dbpedia.org/resource/' + annotation['title'].replace(' ', '_')] = annotation['rho']
    return sort_dict_by_values(annotations)[:MAX_ENTITIES]

In [8]:
def get_nliwod_entities(query, hashmap, include_properties = False):
#     ignore_list = ['name', 'list']
    ignore_list = []
    entities = []
    singular_query = [stemmer.stem(word) if p.singular_noun(word) == False else stemmer.stem(p.singular_noun(word)) for word in query.lower().split(' ')]
    
    for key in hashmap.keys():
        if key in ' '.join(singular_query) and len(key) > 2 and key not in ignore_list:
            entities += hashmap[key]
    return list(set(entities))

In [9]:
def get_entities(query):    
    return {
        'r': get_tag_me_entities(query),
        'p': get_nliwod_entities(query, properties, True),
        'c': get_nliwod_entities(query, classes)
    }

In [10]:
def get_spotlight_entities(query):
    entities = []
    try:
        annotations = spotlight.annotate('http://model.dbpedia-spotlight.org/en/annotate', query, confidence=0.4)    
        for annotation in annotations:
            entities.append(annotation['URI'])
    except:
        pass
    return entities

In [11]:
def make_sparql_query(query, return_var):
    sparql = SPARQLWrapper("http://akswnc9.aksw.uni-leipzig.de/dbpedia/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    if return_var == 'boolean':
        if 'boolean' in results:            
            return results['boolean']
        else:
            return None
    
    output = []
    for result in results["results"]["bindings"]:
        if return_var in result:
            output.append(result[return_var]['value'])
    return output


def get_rdfs_label(prop):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery("""
    SELECT ?label WHERE { <""" + prop + """> rdfs:label ?label . FILTER(lang(?label) = 'en') }
    """)
    sparql.setReturnFormat(JSON)    
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        return result['label']['value']
    return None

In [43]:
df = pd.read_csv('data/lc-quad/dataset.csv')
def get_entities_from_answer(index):
#     string = df[df['_id'] == int(id)]['sparql_query'].tolist()[0]        
    string = qald[index]['sparql_query'] 
    matches = re.findall('<[^>]*>', string)
    matches = [uri.replace('<', '').replace('>', '') for uri in matches if uri != '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>']
    
    results = {'r': set(), 'p': set(), 'c': set()}    
    for match in matches:
        if match.startswith('http://dbpedia.org/resource/'):
            results['r'].add(match)
        elif match.split('/')[-1][0].isupper():
            results['c'].add(match)
        else:
            results['p'].add(match)
    
    return results

In [44]:
# # QALD Slot Filling
# hashmap = {}
# with open('data/qald/qald-7-train-multilingual.json', 'r') as f:
#     qald_train = json.load(f)

# with open('data/qald/qald.json', 'r') as f:
#     qald = json.load(f)

# for question in qald_train['questions']:
#     hashmap[question['question'][0]['string']] = question['query']['sparql']

# for index in range(len(qald)):
#     qald[index]['sparql_query'] = hashmap[qald[index]['question']]

In [45]:
def detect_entities(question, index):
    entities_list = get_entities_from_answer(index)
    detected_entities = get_earl_entities(question)
    detected_entities['r'] = get_tag_me_entities(question) + get_spotlight_entities(question)

    for e_index in range(len(detected_entities['p'])): # Adding property namespace to Earl predicates
        entity = detected_entities['p'][e_index]
        detected_entities['p'].append(entity.replace('/ontology/', '/property/'))

    detected_entities['p'] += get_nliwod_entities(question, properties)

    for e_index in range(len(detected_entities['p'])): # Adding plural properties to all detected predicates
        entity = detected_entities['p'][e_index]
        if entity.startswith('http://dbpedia.org/property/'):
            pred = entity.split('/')[-1].split('_')
            if len(pred) == 1:
                detected_entities['p'].append('http://dbpedia.org/property/' + p.plural_noun(pred[0]))

    detected_entities['r'] = set(detected_entities['r'])
    detected_entities['p'] = set(detected_entities['p'])

    unique_p = set()
    for entity in detected_entities['p']:
        e = p.singular_noun(entity.split('/')[-1])
        if e is not False:
            unique_p.add(e)

    for entity in unique_p:
        if entity == entity.lower():
            detected_entities['c'].append('http://dbpedia.org/ontology/' + entity[0].upper() + entity[1:])        
    detected_entities['c'] = set(detected_entities['c'])
    return detected_entities, entities_list, unique_p

In [27]:
start = 0
exists = 0
count = 1
results = []

# res_df = pd.read_csv('slot_results.csv')

for index in range(start, len(questions)):
#     df_row = res_df.loc[index,:]
#     if df_row['Correct'] == True:
#         continue
        
#     if df_row['C_R'] != df_row['N_R'] and df_row['C_P'] == df_row['N_P']:
    question = questions[index]

    print('\n' * 2)    
    print(index, question)
    
    detected_entities, entities_list, unique_p = detect_entities(question, index)
    row = [len(entities_list['r']), len(entities_list['p']), len(entities_list['c']),
           len(detected_entities['r']), len(unique_p), len(detected_entities['c']),

           len(entities_list['r'].intersection(detected_entities['r'])),
           len(entities_list['p'].intersection(detected_entities['p'])),
           len(entities_list['c'].intersection(detected_entities['c']))
          ]
    row.append(row[0] == row[-3] and row[1] == row[-2] and row[2] == row[-1]) # Check if number and correct are the same    

#     results.append(row)
#     res_df.loc[index,:] = row
    print('\n', entities_list['r'] - detected_entities['r'], '\n', entities_list['p'] - detected_entities['p'])    
    print(index, row)            
    break




0 Philadelphia City Council is the governing body of which city?

 set() 
 set()
0 [1, 1, 0, 3, 9, 8, 1, 1, 0, True]


In [22]:
# res_df = pd.DataFrame(results, columns=['N_R', 'N_P', 'N_C', 'D_R', 'D_P', 'D_C', 'C_R', 'C_P', 'C_C', 'Correct'])
# res_df.to_csv('slot_results.csv', index=False)

In [72]:
# with open('props.csv') as f:
#     props = f.read().split('\n')
# props = props[1:]

# string = ''
# for prop in props:
#     label = get_rdfs_label(prop)
#     if label is not None:
#         print(prop, label)
#         string += '<' + prop + '>' + '<http://www.w3.org/2000/01/rdf-schema#label> "' + label + '" .\n'
        
# with open('props.txt', 'w') as f:
#     f.write(string)

http://dbpedia.org/property/abbreviation abbreviation
http://dbpedia.org/property/absMagnitude abs magnitude
http://dbpedia.org/property/academicAdvisors academic advisors
http://dbpedia.org/property/access access
http://dbpedia.org/property/acreage acreage
http://dbpedia.org/property/act act
http://dbpedia.org/property/ada ADA
http://dbpedia.org/property/added added
http://dbpedia.org/property/address address
http://dbpedia.org/property/addresses addresses
http://dbpedia.org/property/adjacentcomuni adjacentcomuni
http://dbpedia.org/property/administrator administrator
http://dbpedia.org/property/aEngineer a engineer
http://dbpedia.org/property/affiliates affiliates
http://dbpedia.org/property/affiliation affiliation
http://dbpedia.org/property/affiliations affiliations
http://dbpedia.org/property/age age
http://dbpedia.org/property/agency agency
http://dbpedia.org/property/agencyName agency name
http://dbpedia.org/property/agencyType agency type
http://dbpedia.org/property/ageRange ag

http://dbpedia.org/property/beds Beds
http://dbpedia.org/property/beganOperation began operation
http://dbpedia.org/property/begin begin
http://dbpedia.org/property/belfastDistance belfast distance
http://dbpedia.org/property/belfastDistanceKm belfast distance km
http://dbpedia.org/property/belfastDistanceMi belfast distance mi
http://dbpedia.org/property/beltwayCity beltway city
http://dbpedia.org/property/bestFinish Best Finish
http://dbpedia.org/property/bezirk Bezirk
http://dbpedia.org/property/bicycle bicycle
http://dbpedia.org/property/billed billed
http://dbpedia.org/property/binomial binomial
http://dbpedia.org/property/binomialAuthority binomial authority
http://dbpedia.org/property/bioavailability bioavailability
http://dbpedia.org/property/biome biome
http://dbpedia.org/property/birth birth
http://dbpedia.org/property/birthdate birthdate
http://dbpedia.org/property/birthDate birth date
http://dbpedia.org/property/birthname birthname
http://dbpedia.org/property/birthName birt

http://dbpedia.org/property/cityType city type
http://dbpedia.org/property/class class
http://dbpedia.org/property/classes classes
http://dbpedia.org/property/classesOffered classes offered
http://dbpedia.org/property/classis classis
http://dbpedia.org/property/classrooms classrooms
http://dbpedia.org/property/close close
http://dbpedia.org/property/closed closed
http://dbpedia.org/property/closedDate closed date
http://dbpedia.org/property/closing closing
http://dbpedia.org/property/closingDate closing date
http://dbpedia.org/property/clposition clposition
http://dbpedia.org/property/club club
http://dbpedia.org/property/clubname clubname
http://dbpedia.org/property/clubnumber clubnumber
http://dbpedia.org/property/clubs clubs
http://dbpedia.org/property/clyears clyears
http://dbpedia.org/property/coach coach
http://dbpedia.org/property/coachingRecords coaching records
http://dbpedia.org/property/coachTeams coach teams
http://dbpedia.org/property/coachyear1end coachyear1end
http://dbp

http://dbpedia.org/property/denomination denomination
http://dbpedia.org/property/density density
http://dbpedia.org/property/densityKm Density km
http://dbpedia.org/property/densitySqMi density sq mi
http://dbpedia.org/property/department department
http://dbpedia.org/property/departments departments
http://dbpedia.org/property/deployed deployed
http://dbpedia.org/property/depth depth
http://dbpedia.org/property/depthM depth m
http://dbpedia.org/property/deputy deputy
http://dbpedia.org/property/deputyHead deputy head
http://dbpedia.org/property/derivatives derivatives
http://dbpedia.org/property/description Description
http://dbpedia.org/property/design design
http://dbpedia.org/property/designatedOther1Name designated other1 name
http://dbpedia.org/property/designatedOther2Name designated other2 name
http://dbpedia.org/property/designer designer
http://dbpedia.org/property/destinations destinations
http://dbpedia.org/property/destructionDate destruction date
http://dbpedia.org/prope

http://dbpedia.org/property/fail fail
http://dbpedia.org/property/failed failed
http://dbpedia.org/property/fam fam
http://dbpedia.org/property/fame fame
http://dbpedia.org/property/familia familia
http://dbpedia.org/property/family family
http://dbpedia.org/property/fansgroup fansgroup
http://dbpedia.org/property/fareZone fare zone
http://dbpedia.org/property/fastCountry Fast Country
http://dbpedia.org/property/fastDriver Fast Driver
http://dbpedia.org/property/fastestLaps Fastest laps
http://dbpedia.org/property/fastTeam Fast Team
http://dbpedia.org/property/fat fat
http://dbpedia.org/property/fate fate
http://dbpedia.org/property/father father
http://dbpedia.org/property/feastDay feast day
http://dbpedia.org/property/fees fees
http://dbpedia.org/property/field field
http://dbpedia.org/property/fields fields
http://dbpedia.org/property/filename filename
http://dbpedia.org/property/films films
http://dbpedia.org/property/finalChampion final champion
http://dbpedia.org/property/finalda

http://dbpedia.org/property/hkfcsawards hkfcsawards
http://dbpedia.org/property/holidayName holiday name
http://dbpedia.org/property/homecountry homecountry
http://dbpedia.org/property/homepage homepage
http://dbpedia.org/property/homeStadium Home Stadium
http://dbpedia.org/property/hometown hometown
http://dbpedia.org/property/homeTown home town
http://dbpedia.org/property/hongkongfilmwards hongkongfilmwards
http://dbpedia.org/property/honours honours
http://dbpedia.org/property/horsename horsename
http://dbpedia.org/property/horseRace horse race
http://dbpedia.org/property/horses horses
http://dbpedia.org/property/hostCity Host city
http://dbpedia.org/property/hotelName hotel name
http://dbpedia.org/property/house house
http://dbpedia.org/property/houses houses
http://dbpedia.org/property/houseType house-type
http://dbpedia.org/property/hqCity hq city
http://dbpedia.org/property/hsEnsembl Hs Ensembl
http://dbpedia.org/property/hsEntrezgene Hs EntrezGene
http://dbpedia.org/property/hs

http://dbpedia.org/property/licensee licensee
http://dbpedia.org/property/licensor Licensor
http://dbpedia.org/property/lid LID
http://dbpedia.org/property/lieutenancyEngland lieutenancy england
http://dbpedia.org/property/lieutenancyNorthernIreland lieutenancy northern ireland
http://dbpedia.org/property/lieutenancyScotland lieutenancy scotland
http://dbpedia.org/property/lieutenancyWales lieutenancy wales
http://dbpedia.org/property/lieutenant lieutenant
http://dbpedia.org/property/line line
http://dbpedia.org/property/linelength linelength
http://dbpedia.org/property/linelengthKm linelength km
http://dbpedia.org/property/linelengthMi linelength mi
http://dbpedia.org/property/lines lines
http://dbpedia.org/property/link link
http://dbpedia.org/property/load load
http://dbpedia.org/property/local local
http://dbpedia.org/property/localAuthority local authority
http://dbpedia.org/property/locale locale
http://dbpedia.org/property/localName local name
http://dbpedia.org/property/localna

http://dbpedia.org/property/narrator narrator
http://dbpedia.org/property/national national
http://dbpedia.org/property/nationalAnthem national anthem
http://dbpedia.org/property/nationalcaps nationalcaps
http://dbpedia.org/property/nationalCuisine national cuisine
http://dbpedia.org/property/nationalgoals nationalgoals
http://dbpedia.org/property/nationality nationality
http://dbpedia.org/property/nationalMotto national motto
http://dbpedia.org/property/nationalOrigin national origin
http://dbpedia.org/property/nationalRanking national ranking
http://dbpedia.org/property/nationalteam nationalteam
http://dbpedia.org/property/nationalyears nationalyears
http://dbpedia.org/property/nationsParticipating Nations participating
http://dbpedia.org/property/nativeBuildingName native building name
http://dbpedia.org/property/nativename nativename
http://dbpedia.org/property/nativeName native name
http://dbpedia.org/property/nativenameA nativename a
http://dbpedia.org/property/nativeNameLang nat

http://dbpedia.org/property/party party
http://dbpedia.org/property/partyElection party election
http://dbpedia.org/property/parvordo parvordo
http://dbpedia.org/property/passengers passengers
http://dbpedia.org/property/passPercent pass percent
http://dbpedia.org/property/passSystem pass system
http://dbpedia.org/property/passYear pass year
http://dbpedia.org/property/pastmembers pastmembers
http://dbpedia.org/property/pastMembers past members
http://dbpedia.org/property/pastor pastor
http://dbpedia.org/property/pastschools pastschools
http://dbpedia.org/property/pastteams pastteams
http://dbpedia.org/property/patron patron
http://dbpedia.org/property/pdb PDB
http://dbpedia.org/property/peerReviewed peer-reviewed
http://dbpedia.org/property/penname penname
http://dbpedia.org/property/percentWater percent water
http://dbpedia.org/property/periapsis periapsis
http://dbpedia.org/property/periastron periastron
http://dbpedia.org/property/perihelion perihelion
http://dbpedia.org/property/p

http://dbpedia.org/property/published published
http://dbpedia.org/property/publisher publisher
http://dbpedia.org/property/publisherEn publisher en
http://dbpedia.org/property/pupils pupils
http://dbpedia.org/property/purpose purpose
http://dbpedia.org/property/r R
http://dbpedia.org/property/r1LengthF r1-length-f
http://dbpedia.org/property/r1LengthM r1-length-m
http://dbpedia.org/property/r1Number r1-number
http://dbpedia.org/property/r1Surface r1-surface
http://dbpedia.org/property/r2LengthF r2-length-f
http://dbpedia.org/property/r2LengthM r2-length-m
http://dbpedia.org/property/r2Number r2-number
http://dbpedia.org/property/r2Surface r2-surface
http://dbpedia.org/property/r3LengthF r3-length-f
http://dbpedia.org/property/r3LengthM r3-length-m
http://dbpedia.org/property/r3Number r3-number
http://dbpedia.org/property/r3Surface r3-surface
http://dbpedia.org/property/r4LengthF r4-length-f
http://dbpedia.org/property/r4LengthM r4-length-m
http://dbpedia.org/property/r4Number r4-numbe

http://dbpedia.org/property/shipChristened Ship christened
http://dbpedia.org/property/shipClass Ship class
http://dbpedia.org/property/shipCommissioned Ship commissioned
http://dbpedia.org/property/shipCompleted Ship completed
http://dbpedia.org/property/shipCountry Ship country
http://dbpedia.org/property/shipDecommissioned Ship decommissioned
http://dbpedia.org/property/shipDisplacement Ship displacement
http://dbpedia.org/property/shipDraft Ship draft
http://dbpedia.org/property/shipFate Ship fate
http://dbpedia.org/property/shipHeight Ship height
http://dbpedia.org/property/shipHomeport Ship homeport
http://dbpedia.org/property/shipInService Ship in service
http://dbpedia.org/property/shipLaidDown Ship laid down
http://dbpedia.org/property/shipLaunched Ship launched
http://dbpedia.org/property/shipLength Ship length
http://dbpedia.org/property/shipMaidenVoyage Ship maiden voyage
http://dbpedia.org/property/shipMotto Ship motto
http://dbpedia.org/property/shipName Ship name
http://

http://dbpedia.org/property/subRegions sub regions
http://dbpedia.org/property/subregnum subregnum
http://dbpedia.org/property/subsid subsid
http://dbpedia.org/property/subsidiaries subsidiaries
http://dbpedia.org/property/subspecies subspecies
http://dbpedia.org/property/subtitle subtitle
http://dbpedia.org/property/subtribus subtribus
http://dbpedia.org/property/succeeded succeeded
http://dbpedia.org/property/succeeding succeeding
http://dbpedia.org/property/success success
http://dbpedia.org/property/successor successor
http://dbpedia.org/property/sudoc SUDOC
http://dbpedia.org/property/summerappearances summerappearances
http://dbpedia.org/property/superclassis superclassis
http://dbpedia.org/property/superdivisio superdivisio
http://dbpedia.org/property/superdomain superdomain
http://dbpedia.org/property/superfamilia superfamilia
http://dbpedia.org/property/superintendent superintendent
http://dbpedia.org/property/superordo superordo
http://dbpedia.org/property/superphylum superph

http://dbpedia.org/property/verbandsgemeinde Verbandsgemeinde
http://dbpedia.org/property/verwaltungsgemeinschaft Verwaltungsgemeinschaft
http://dbpedia.org/property/verwaltungsverband Verwaltungsverband
http://dbpedia.org/property/viaf viaf
http://dbpedia.org/property/viceChancellor Vice-Chancellor
http://dbpedia.org/property/vicepresident vicepresident
http://dbpedia.org/property/vicePresident Vice President
http://dbpedia.org/property/viceprimeminister viceprimeminister
http://dbpedia.org/property/viceprincipal viceprincipal
http://dbpedia.org/property/viceprincipalLabel viceprincipal label
http://dbpedia.org/property/vineyards vineyards
http://dbpedia.org/property/virtual virtual
http://dbpedia.org/property/visitation visitation
http://dbpedia.org/property/visitationNum visitation num
http://dbpedia.org/property/visitationYear visitation year
http://dbpedia.org/property/visitors visitors
http://dbpedia.org/property/voice voice
http://dbpedia.org/property/voicedBy voiced by
http://d

In [27]:
correct = res_df['Correct']
accuracy = 0
top_2_accuracy = 0

for index in range(len(questions)):
    if correct[index] == True:
        if y_true[index] == y_pred[index][0]:
            accuracy += 1
            top_2_accuracy += 1
        elif y_true[index] == y_pred[index][1]:
            top_2_accuracy += 1
accuracy / len(correct), top_2_accuracy / len(correct), correct.sum() / len(correct)

(0.4087136929460581, 0.4616182572614108, 0.48962655601659749)

In [30]:
len(res_df[res_df['N_R'] == res_df['C_R']]) / len(questions), len(res_df[res_df['N_P'] == res_df['C_P']]) / len(questions), len(res_df[(res_df['N_C'] == res_df['C_C']) & res_df['N_C'] > 0]) / len(res_df[res_df['N_C'] > 0])

(0.8132780082987552, 0.6182572614107884, 0.837465564738292)

In [49]:
def get_answer(entities, template_id):
    output = []
    sparql_query = ''
    
    template = templates[str(template_id)]
    slots = {}
    for slot in template['slots']:
        slots[slot] = entities[slot[0]]        
    
    # This means something probably went wrong and no predicates or resources were detected for the query
    if len(slots['p']) == 0 or len(slots['r']) == 0:
        return output, sparql_query
    
    ranges = []
    slot_keys = list(slots.keys())
    slot_len = len(slot_keys)
    for slot in slot_keys:
        ranges.append(range(len(slots[slot]))) 
    
    for i in itertools.product(*ranges):
        if slot_keys[0] == 'p' and slot_keys[1] == 'p2' and slots['p'][i[0]] == slots['p2'][i[1]]:
            continue
        if slot_keys[-2] == 'r' and slot_keys[-1] == 'r2' and slots['r'][i[-2]] == slots['r2'][i[-1]]:
            continue
            
        sparql_query = template['sparql']                
        for index in range(slot_len):
            sparql_query = sparql_query.replace('<' + slot_keys[index] + '>', '<' + slots[slot_keys[index]][i[index]] + '>')
        
        print('.', end='')
        
        output = make_sparql_query(sparql_query, template['return'])        
        if template['return'] == 'boolean':
            if output == None:
                output = []
            else:
                output = [output]

        if template['return'] == 'count' and int(output[0]) == 0:
            continue
            
        elif len(output) > 0:
            break

    return output, sparql_query

In [50]:
def filter_out_entities(resources, predicates):
    sparql = SPARQLWrapper("http://akswnc9.aksw.uni-leipzig.de/dbpedia/sparql")
    result = []
    
    if len(resources) == 0:
        return []
    
    query = """
    SELECT DISTINCT ?p WHERE {
        VALUES ?r {
            """ + '<' + '> <'.join(resources) + '>' + """
        }

        { ?r ?p ?x }
        UNION {?r ?p2 ?x . ?x ?p ?x2}
        UNION {?x ?p ?r}
        UNION {?x ?p2 ?r . ?x ?p ?x2}
    }
    """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)    
    result_set = sparql.query().convert()
    preds = set()    
    
    for result in result_set['results']['bindings']:
        preds.add(result['p']['value'])
    
    return list(predicates.intersection(preds))

def filter_out_classes(classes):
    results = []
    for _class in list(classes):
        if get_rdfs_label(_class) != None:
            results.append(_class)
    return results    

In [172]:
with open('data/qald/qald.json', 'r') as f:
    qald = json.load(f)
qald[45]

{'actual': '2',
 'predictions': '2,1',
 'question': 'Who is the mayor of Tel Aviv?',
 'sparql_query': 'SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Tel_Aviv> <http://dbpedia.org/ontology/leaderName> ?uri .}'}

In [53]:
# res_df = pd.read_csv('slot_results.csv')
with open('data/qald/qald.json', 'r') as f:
    qald = json.load(f)

answers = {
    "dataset": {
        "id": "LC-QuAD-Custom",
        "name": "LC-QuAD Custom Test Datset"
    },
    "questions": [
        
    ]
}

# answers_to_questions = []
start = 0
sparql = SPARQLWrapper("http://akswnc9.aksw.uni-leipzig.de/dbpedia/sparql")
questions = qald

for index in range(start, len(questions)):
    print("\n" * 3)
    print(index, questions[index], y_true[index])

#     if res_df.loc[index]['Correct'] == False:
#         print('SKIPPED')
#         continue
    
#     q = df[df['_id'] == int(ids[index])]
#     sparql_query = q['sparql_query'].tolist()[0].strip()
    template = int(questions[index]['actual'])
    
    if template == 151:
        answertype = "Boolean"
    elif template < 100:
        answertype = "ListOfResource"
    else:
        answertype = "Number"
    
    detected_entities, entities_list, unique_p = detect_entities(questions[index]['question'], index)
        
#     detected_entities['p'] = filter_out_entities(list(detected_entities['r'].intersection(entities_list['r'])), detected_entities['p'])
#     detected_entities['p'].sort(key = len, reverse=True)
    
#     detected_entities['r'] = list(detected_entities['r'])
#     detected_entities['c'] = filter_out_classes(detected_entities['c'])

    detected_entities['r'] = list(detected_entities['r'].intersection(entities_list['r']))
    detected_entities['p'] = list(detected_entities['p'].intersection(entities_list['p']))
    classes = list(detected_entities['c'].intersection(entities_list['c']))
    
    if len(classes) == 0:
        detected_entities['c'] = filter_out_classes(detected_entities['c'])
    else:
        detected_entities['c'] = classes
        
    output, sparql_query = get_answer(detected_entities, y_pred[index][0])
    
    if len(output) == 0:        
        output, sparql_query = get_answer(detected_entities, y_pred[index][1])
        
    if len(sparql_query) > 0:
        sparql.setQuery(sparql_query)
        sparql.setReturnFormat(JSON)    
        result_set = sparql.query().convert()
    else:
        result_set = []
        
    question = {
        "id": index,
        "metadata": {
            "answertype": answertype
        },
        "question": [{
            "language": "en",
            "string": questions[index]
        }],
        "query": {
            "sparql": sparql_query
        },
        "answers": result_set
    }
    
    answers_to_questions.append(question)





0 {'question': 'List all the musicals with music by Elton John.', 'predictions': '1,8', 'actual': '1', 'sparql_query': 'SELECT DISTINCT ?uri\nWHERE { \n        ?uri <http://dbpedia.org/ontology/musicBy> <http://dbpedia.org/resource/Elton_John> .\n        ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Musical> .\n\n}'} 1
..



1 {'question': 'How high is the lighthouse in Colombo?', 'predictions': '101,151', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?num WHERE {  <http://dbpedia.org/resource/Colombo_Lighthouse> <http://dbpedia.org/ontology/height> ?num . } '} 3




2 {'question': 'Who was the wife of U.S. president Lincoln?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri \nWHERE {\n\t<http://dbpedia.org/resource/Abraham_Lincoln? <http://dbpedia.org/ontology/spouse> ?uri.\n}'} 8




3 {'question': 'Who is the host of the BBC Wildlife Specials?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINC





31 {'question': 'Is Christian Bale starring in Batman Begins?', 'predictions': '151,2', 'actual': '151', 'sparql_query': 'ASK WHERE {\n\t<http://dbpedia.org/resource/Batman_Begins> <http://dbpedia.org/ontology/starring> <http://dbpedia.org/resource/Christian_Bale> .\n}'} 111




32 {'question': 'In which country is the Limerick Lake?', 'predictions': '5,2', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri \nWHERE { \n\t<http://dbpedia.org/resource/Limerick_Lake> <http://dbpedia.org/ontology/country> ?uri .\n}'} 16




33 {'question': 'Who created Family Guy?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Family_Guy> <http://dbpedia.org/ontology/creator> ?uri . }  '} 2
.



34 {'question': 'What is the official language of Suriname?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri\nWHERE { \n        <http://dbpedia.org/resource/Suriname> <http://dbpedia.org/ontology/officialLanguage> ?

.



60 {'question': "When did Dracula's creator die?", 'predictions': '2,1', 'actual': '3', 'sparql_query': 'SELECT DISTINCT ?date WHERE {  <http://dbpedia.org/resource/Count_Dracula> <http://dbpedia.org/ontology/creator> ?x .  ?x <http://dbpedia.org/ontology/deathDate> ?date . } '} 3




61 {'question': 'Which country does the creator of Miffy come from?', 'predictions': '3,5', 'actual': '3', 'sparql_query': 'SELECT DISTINCT ?uri \nWHERE {\n\t<http://dbpedia.org/resource/Miffy> <http://dbpedia.org/ontology/creator> ?x .\n\t?x <http://dbpedia.org/ontology/nationality> ?uri .\n}'} 5




62 {'question': "What is Batman's real name?", 'predictions': '1,2', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?s WHERE { <http://dbpedia.org/resource/Batman> <http://dbpedia.org/property/alterEgo> ?s . }'} 3




63 {'question': 'What form of government does Russia have?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Russia> <http:





91 {'question': 'Give me all actors starring in Last Action Hero.', 'predictions': '6,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri\nWHERE {\n        <http://dbpedia.org/resource/Last_Action_Hero> <http://dbpedia.org/ontology/starring> ?uri . \n}'} 151
.



92 {'question': 'What are the nicknames of San Francisco?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?string WHERE {  <http://dbpedia.org/resource/San_Francisco> <http://dbpedia.org/property/nickname> ?string . } '} 151
.



93 {'question': 'Which television shows were created by Walt Disney?', 'predictions': '1,6', 'actual': '1', 'sparql_query': 'SELECT DISTINCT ?uri\nWHERE {\n\t?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/TelevisionShow> .\n        ?uri <http://dbpedia.org/ontology/creator> <http://dbpedia.org/resource/Walt_Disney> .\n}'} 101




94 {'question': 'What country is Sitecore from?', 'predictions': '2,1', 'actual': '2', 'sparql_query': '





122 {'question': 'How high is the Yokohama Marine Tower?', 'predictions': '101,151', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?num\nWHERE {\n        <http://dbpedia.org/resource/Yokohama_Marine_Tower> <http://dbpedia.org/ontology/height> ?num .\n}'} 105
.



123 {'question': 'Who developed the video game World of Warcraft?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT DISTINCT ?uri \nWHERE {\n\t<http://dbpedia.org/resource/World_of_Warcraft> <http://dbpedia.org/ontology/developer> ?uri . \n}'} 1
..



124 {'question': 'Who was the pope that founded the Vatican Television?', 'predictions': '2,1', 'actual': '2', 'sparql_query': 'SELECT ?uri \nWHERE { \n         ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Pope> . \n         <http://dbpedia.org/resource/Vatican_Television_Center> <http://dbpedia.org/ontology/foundedBy> ?uri .\n}'} 151




125 {'question': 'Who composed the music for Harold and Maude?', 'predictions': '7,15

In [227]:
with open('qald_results.json', 'r') as f:
    answers_to_questions = json.load(f)

with open('data/qald/qald.json', 'r') as f:
    qald = json.load(f)
    
start = 80
questions = qald
for index in range(start, start + 1):
    answer = answers_to_questions[index]
    if answer['answers'] != []:
        continue
    
    if answer['metadata']['answertype'] != 'ListOfResource':
        continue
    
#     if answer['question'][0]['string']['actual'] not in answer['question'][0]['string']['predictions'].split(','):
#         continue
    
    print(index, answer['question'][0]['string']['question'])
    
    detected_entities, entities_list, unique_p = detect_entities(questions[index]['question'], index)
    detected_entities['r'] = list(detected_entities['r'].intersection(entities_list['r'])) + ['http://dbpedia.org/resource/The_Pillars_of_the_Earth']
    detected_entities['p'] = list(detected_entities['p'].intersection(entities_list['p']))
    classes = list(entities_list['c'])
    
    if len(classes) == 0:
        detected_entities['c'] = [list(detected_entities['c'])[0]]
    else:
        detected_entities['c'] = classes
    
    template = int(questions[index]['actual'])
    output, sparql_query = get_answer(detected_entities, template)
            
    if len(sparql_query) > 0:
        sparql.setQuery(sparql_query)
        sparql.setReturnFormat(JSON)    
        result_set = sparql.query().convert()
    else:
        result_set = []
    
    answers_to_questions[index]['answers'] = result_set
    answers_to_questions[index]['query']['sparql'] = sparql_query

    print(index)
    print('\n' * 2)
    print(entities_list, '\n' * 3, detected_entities)    
    print(result_set)
    print('\n' * 5)

80 Which movies did Kurosawa direct?
.80



{'r': {'http://dbpedia.org/resource/Akira_Kurosawa'}, 'p': {'http://dbpedia.org/ontology/director'}, 'c': {'http://dbpedia.org/ontology/Film'}} 


 {'r': ['http://dbpedia.org/resource/Akira_Kurosawa', 'http://dbpedia.org/resource/The_Pillars_of_the_Earth'], 'p': ['http://dbpedia.org/ontology/director'], 'c': ['http://dbpedia.org/ontology/Film']}
{'head': {'link': [], 'vars': ['uri']}, 'results': {'distinct': False, 'ordered': True, 'bindings': [{'uri': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Rhapsody_in_August'}}, {'uri': {'type': 'uri', 'value': 'http://dbpedia.org/resource/The_Lower_Depths_(1957_film)'}}, {'uri': {'type': 'uri', 'value': "http://dbpedia.org/resource/The_Men_Who_Tread_on_the_Tiger's_Tail"}}, {'uri': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Horse_(1941_film)'}}, {'uri': {'type': 'uri', 'value': 'http://dbpedia.org/resource/The_Idiot_(1951_film)'}}, {'uri': {'type': 'uri', 'value': 'http://dbpedia.or

In [228]:
# with open('qald_results.json', 'w') as f:
#     json.dump(answers_to_questions, f)

In [230]:
with open('qald_results.json', 'r') as f:
    answers_to_questions = json.load(f)

micro_p = []
micro_tp = []
micro_fp = []
micro_fn = []

def get_uris(answers):
    uris = set()
    if answers != []:
        for answer in answers['results']['bindings']:
            uris.add(answer[answers['head']['vars'][0]]['value'])
    return uris

def safe_div(x, y):
    if y == 0:
        return 0
    return x / y

for index in range(len(answers_to_questions)):
    answertype = answers_to_questions[index]['metadata']['answertype']
    
    answers = answers_to_questions[index]['answers']
    golden_answers = answers_to_questions[index]['golden_answers']
    
    if answertype == 'ListOfResource':
        answers_uris = get_uris(answers)
        golden_answers_uris = get_uris(golden_answers)        
    else:
        answers_uris = set()
        golden_answers_uris = set()
        
        if 'boolean' in answers:
            answers_uris.add(answers['boolean'])
        golden_answers_uris.add(golden_answers['boolean'])
    micro_p.append(len(golden_answers_uris))
    micro_tp.append(len(golden_answers_uris.intersection(answers_uris)))
    micro_fp.append(len(answers_uris - golden_answers_uris))
    micro_fn.append(len(golden_answers_uris - answers_uris))
    
pr = []
r = []
f = []
for index in range(len(answers_to_questions)):
    pr.append(safe_div(micro_tp[index], micro_tp[index] + micro_fp[index]))
    r.append(safe_div(micro_tp[index], micro_tp[index] + micro_fn[index]))
    f.append(safe_div(2 * pr[index] * r[index], pr[index] + r[index]))

In [231]:
micro_tp[80]

32

In [232]:
pd.Series(pr).mean(), pd.Series(r).mean(), pd.Series(f).mean()

(0.4164335664335664, 0.4230769230769231, 0.41723076923076924)

In [233]:
tp = sum(micro_tp)
fp = sum(micro_fp)
fn = sum(micro_fn)

p_macro = safe_div(tp, tp + fp)
r_macro = safe_div(tp, tp + fn)
p_macro, r_macro, safe_div(2 * p_macro * r_macro, p_macro + r_macro)

(0.7575757575757576, 0.4666666666666667, 0.5775577557755777)

In [241]:
p = 0.612
r = 0.466
p, r, 2 * p * r / (p + r)

(0.612, 0.466, 0.529113172541744)

In [207]:
for i in range(len(micro_fn)):
    if micro_fn[i] > 10:
        print(i, micro_fn[i])

36 12
46 13
74 16
80 32
101 34


In [157]:
micro_p

[4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 7,
 1,
 2,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 2,
 1,
 1,
 1,
 4,
 1,
 4,
 1,
 1,
 1,
 1,
 3,
 12,
 10,
 6,
 1,
 1,
 1,
 2,
 1,
 7,
 7,
 13,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 32,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 4,
 1,
 1,
 7,
 3,
 1,
 2,
 1,
 16,
 1,
 1,
 1,
 3,
 1,
 32,
 1,
 1,
 3,
 1,
 10,
 2,
 1,
 1,
 1,
 1,
 9,
 1,
 3,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 34,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 11,
 1,
 1,
 1,
 1,
 3,
 1,
 5,
 1]

In [114]:
with open('qald_results.json', 'r') as f:
    answers_to_questions = json.load(f)
    
for index in range(len(answers_to_questions)):
    answer = answers_to_questions[index]
    
    print(answer['question'][0]['string']['sparql_query'].replace('\n', ' ').replace('\t', ' '))
    sparql.setQuery(answer['question'][0]['string']['sparql_query'].replace('\n', ' ').replace('\t', ' '))
    sparql.setReturnFormat(JSON)    
    result_set = sparql.query().convert()
    answers_to_questions[index]['golden_answers'] = result_set

SELECT DISTINCT ?uri WHERE {          ?uri <http://dbpedia.org/ontology/musicBy> <http://dbpedia.org/resource/Elton_John> .         ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Musical> .  }
SELECT DISTINCT ?num WHERE {  <http://dbpedia.org/resource/Colombo_Lighthouse> <http://dbpedia.org/ontology/height> ?num . } 
SELECT DISTINCT ?uri  WHERE {  <http://dbpedia.org/resource/Abraham_Lincoln> <http://dbpedia.org/ontology/spouse> ?uri. }
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/BBC_Wildlife_Specials> <http://dbpedia.org/ontology/presenter> ?uri . }
SELECT DISTINCT ?n WHERE {  <http://dbpedia.org/resource/Pulp_Fiction> <http://dbpedia.org/ontology/budget> ?n . } 
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Heineken> <http://dbpedia.org/ontology/manufacturer> ?x . ?x <http://dbpedia.org/ontology/locationCity> ?uri . }
SELECT DISTINCT ?uri WHERE {  <http://dbpedia.org/resource/Chile_Route_68> <http://dbpedia.org/ontology/r

SELECT DISTINCT ?number WHERE {  <http://dbpedia.org/resource/Aldi> <http://dbpedia.org/ontology/numberOfLocations> ?number . } 
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Cameroon> <http://dbpedia.org/ontology/capital> ?uri. } 
SELECT DISTINCT ?uri WHERE {  ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Film> .  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Francis_Ford_Coppola> . } 
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Elvis_Presley> <http://dbpedia.org/ontology/child> ?child . ?child <http://dbpedia.org/ontology/child> ?uri . }
SELECT DISTINCT ?uri WHERE { ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Film> . ?uri <http://dbpedia.org/ontology/starring> <http://dbpedia.org/resource/Mickey_Rourke> . ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Guy_Ritchie> . } 
SELECT DISTINCT ?date WHERE {  <http://dbpedia.org/resource/C

In [102]:
index

68

In [301]:
detected_entities
q = df[df['_id'] == int(ids[0])]
print(q['sparql_query'].tolist()[0].strip())
print(answers_to_questions[0]['query']['sparql'])
print(index)

SELECT DISTINCT ?uri WHERE {?uri <http://dbpedia.org/property/governingBody> <http://dbpedia.org/resource/Philadelphia_City_Council> }
SELECT DISTINCT ?uri WHERE { ?uri <http://dbpedia.org/property/governingBody> <http://dbpedia.org/resource/Philadelphia_City_Council> . OPTIONAL { ?uri rdf:type <http://dbpedia.org/ontology/Company> } }
963


In [421]:
with open('answers_model.json', 'r') as f:
    answers_model = json.load(f)

with open('golden_answers.json', 'r') as f:
    golden_answers = json.load(f)['questions']

In [424]:
no_records = []    
for index in range(len(data)):
    if res_df.loc[index, 'Correct'] == True:
        continue
    
    if 'boolean' in golden_answers[index]['answers']:
        pass
    elif golden_answers[index]['answers']['head']['vars'][0] in ['uri', 'count']:
        var = golden_answers[index]['answers']['head']['vars'][0]
        
        g_answers = set()
        m_answers = set()
        
        for i in golden_answers[index]['answers']['results']['bindings']:
            g_answers.add(i[var]['value'])
        
        try:
            for i in answers_model[index]['answers']['results']['bindings']:
                m_answers.add(i[var]['value'])
                
            if g_answers - m_answers != set():     
                print('\n\n', index)
                print(len(golden_answers[index]['answers']['results']['bindings']))
                print(golden_answers[index]['query']['sparql'])

                print(len(answers_model[index]['answers']['results']['bindings']))
                print(answers_model[index]['query']['sparql'])
                print('\n' * 5)
        except:
            print(index, 'NO RECORDS IT SEEMS', answers_model[index]['answers'])
            
            detected_entities, entities_list, unique_p = detect_entities(questions[index], index)
            
            detected_entities['r'] = list(detected_entities['r'].intersection(entities_list['r']))
            detected_entities['p'] = list(detected_entities['p'].intersection(entities_list['p']))
            detected_entities['c'] = list(detected_entities['c'].intersection(entities_list['c']))
            
            print(detected_entities)
            
            print(entities_list)
            continue
            
            output, sparql_query = get_answer(detected_entities, y_true[index])
            
            if len(sparql_query) > 0:
                sparql.setQuery(sparql_query)
                sparql.setReturnFormat(JSON)    
                result_set = sparql.query().convert()
                                
                print('found the right answer')
                answers_model[index]['query']['sparql'] = sparql_query
                answers_model[index]['answers'] = result_set     
            else:
                print(index, 'STILL NOT WORKING')
            no_records.append(index)
            break

#     print(golden_answers[index]['answers'])
#     print(golden_answers[index]['query']['sparql'])

#     print('\n\n')
    
#     print(answers_model[index]['answers'])
#     print(answers_model[index]['query']['sparql'])    

2 NO RECORDS IT SEEMS []
{'r': [], 'p': ['http://dbpedia.org/ontology/portrayer'], 'c': ['http://dbpedia.org/ontology/FictionalCharacter']}
{'r': {'http://dbpedia.org/resource/Terminator_(franchise)', 'http://dbpedia.org/resource/Terminator_2:_Judgment_Day'}, 'p': {'http://dbpedia.org/ontology/portrayer', 'http://dbpedia.org/ontology/series'}, 'c': {'http://dbpedia.org/ontology/FictionalCharacter'}}
3 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Mariveles,_Bataan'], 'p': ['http://dbpedia.org/ontology/sport'], 'c': []}
{'r': {'http://dbpedia.org/resource/Mariveles,_Bataan'}, 'p': {'http://dbpedia.org/ontology/sport', 'http://dbpedia.org/property/city'}, 'c': set()}
6 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Ganz_UV'], 'p': ['http://dbpedia.org/ontology/nationality', 'http://dbpedia.org/ontology/operator'], 'c': []}
{'r': {'http://dbpedia.org/resource/Géza_Horváth', 'http://dbpedia.org/resource/Ganz_UV'}, 'p': {'http://dbpedia.org/ontology/nationality', 'h

{'r': ['http://dbpedia.org/resource/Joey_McIntyre'], 'p': [], 'c': []}
{'r': {'http://dbpedia.org/resource/Joey_McIntyre'}, 'p': {'http://dbpedia.org/property/starring'}, 'c': set()}
53 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Tonlé_Sap'], 'p': [], 'c': []}
{'r': {'http://dbpedia.org/resource/Tonlé_Sap'}, 'p': {'http://dbpedia.org/ontology/rightTributary'}, 'c': set()}
60 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Pittsburgh_Line'], 'p': [], 'c': []}
{'r': {'http://dbpedia.org/resource/Pittsburgh_Line'}, 'p': {'http://dbpedia.org/ontology/founder', 'http://dbpedia.org/ontology/routeEnd'}, 'c': set()}
62 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Henri_Putz'], 'p': ['http://dbpedia.org/ontology/battle'], 'c': []}
{'r': {'http://dbpedia.org/resource/Henri_Putz'}, 'p': {'http://dbpedia.org/ontology/battle', 'http://dbpedia.org/ontology/territory'}, 'c': set()}
64 NO RECORDS IT SEEMS []
{'r': [], 'p': [], 'c': ['http://dbpedia.org/ontology/

{'r': [], 'p': ['http://dbpedia.org/ontology/sport'], 'c': ['http://dbpedia.org/ontology/Sport']}
{'r': {'http://dbpedia.org/resource/Maharashtra'}, 'p': {'http://dbpedia.org/property/state', 'http://dbpedia.org/ontology/sport'}, 'c': {'http://dbpedia.org/ontology/Sport'}}


 142
1
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Manendragarh> <http://dbpedia.org/ontology/leaderName> ?uri. <http://dbpedia.org/resource/Rishang_Keishing> <http://dbpedia.org/property/party> ?uri}
0
SELECT DISTINCT ?uri WHERE { <http://dbpedia.org/resource/Rishang_Keishing> <http://dbpedia.org/property/party> ?uri. <http://dbpedia.org/resource/Manendragarh> <http://dbpedia.org/property/party> ?uri . }






143 NO RECORDS IT SEEMS []
{'r': [], 'p': ['http://dbpedia.org/property/country', 'http://dbpedia.org/property/capital'], 'c': []}
{'r': {'http://dbpedia.org/resource/Bocas_Town,_Bocas_del_Toro'}, 'p': {'http://dbpedia.org/property/country', 'http://dbpedia.org/property/capital'}, 'c': set()}
1

{'r': [], 'p': ['http://dbpedia.org/property/artist'], 'c': ['http://dbpedia.org/ontology/Artist']}
{'r': {"http://dbpedia.org/resource/Musée_d'Orsay"}, 'p': {'http://dbpedia.org/property/museum', 'http://dbpedia.org/property/artist'}, 'c': {'http://dbpedia.org/ontology/Artist'}}
192 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Willard_Fiske'], 'p': [], 'c': ['http://dbpedia.org/ontology/AcademicJournal']}
{'r': {'http://dbpedia.org/resource/Willard_Fiske'}, 'p': {'http://dbpedia.org/property/discipline', 'http://dbpedia.org/property/editor'}, 'c': {'http://dbpedia.org/ontology/AcademicJournal'}}
193 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/Pope_Gregory_I', 'http://dbpedia.org/resource/Edwin_of_Northumbria'], 'p': [], 'c': []}
{'r': {'http://dbpedia.org/resource/Pope_Gregory_I', 'http://dbpedia.org/resource/Edwin_of_Northumbria'}, 'p': {'http://dbpedia.org/property/veneratedIn'}, 'c': set()}
194 NO RECORDS IT SEEMS []
{'r': ['http://dbpedia.org/resource/

KeyboardInterrupt: 

In [408]:
for index in range(len(data)):
    if res_df.loc[index, 'Correct'] == False:
        continue
    
    if 'boolean' in golden_answers[index]['answers']:
        pass
    elif golden_answers[index]['answers']['head']['vars'][0] in ['uri', 'count']:
        var = golden_answers[index]['answers']['head']['vars'][0]
        
        g_answers = set()
        m_answers = set()
        
        for i in golden_answers[index]['answers']['results']['bindings']:
            g_answers.add(i[var]['value'])
        
        try:
            for i in answers_model[index]['answers']['results']['bindings']:
                m_answers.add(i[var]['value'])
                
            if g_answers - m_answers != set():     
                print('\n\n', index)
                print(len(golden_answers[index]['answers']['results']['bindings']))
                print(golden_answers[index]['query']['sparql'])

                print(len(answers_model[index]['answers']['results']['bindings']))
                print(answers_model[index]['query']['sparql'])
                print('\n' * 5)
        except:
            print(index, 'NO RECORDS IT SEEMS', answers_model[index]['answers'])
            no_records.append(index)


31 NO RECORDS IT SEEMS []
34 NO RECORDS IT SEEMS []


 45
1
SELECT DISTINCT (COUNT(?uri) as ?count) WHERE { ?x <http://dbpedia.org/ontology/kingdom> <http://dbpedia.org/resource/Animal> . ?x <http://dbpedia.org/ontology/species> ?uri  . ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Species>}
1
SELECT DISTINCT (COUNT(?uri) as ?count) WHERE { ?x <http://dbpedia.org/ontology/kingdom> <http://dbpedia.org/resource/Animal> . ?x <http://dbpedia.org/ontology/species> ?uri . OPTIONAL { ?uri rdf:type <http://dbpedia.org/ontology/Species> } }








 61
5
SELECT DISTINCT ?uri WHERE { ?x <http://dbpedia.org/property/children> <http://dbpedia.org/resource/Levi_Lincoln,_Jr.> . ?x <http://dbpedia.org/property/successor> ?uri  . ?x <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Governor>}
2
SELECT DISTINCT ?uri WHERE { ?x <http://dbpedia.org/property/successor> <http://dbpedia.org/resource/Levi_Lincoln,_Jr.> . ?x <http://dbpedia.or

In [409]:
index = 222
print(golden_answers[index]['query']['sparql'])
print('\n' * 3)
print(answers_model[index]['query']['sparql'])
print(y_true[index], y_pred[index][0], y_pred[index][1])

SELECT DISTINCT (COUNT(?uri) as ?count) WHERE {?uri <http://dbpedia.org/property/training> <http://dbpedia.org/resource/San_Francisco_Art_Institute>  . ?uri <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Artist>}





101 105 106


In [412]:
detected_entities, entities_list, unique_p = detect_entities(questions[222], index)

In [414]:
detected_entities, entities_list

({'c': {'http://dbpedia.org/ontology/Artist',
   'http://dbpedia.org/ontology/Institution',
   'http://dbpedia.org/ontology/Institutions',
   'http://dbpedia.org/ontology/Number',
   'http://dbpedia.org/ontology/Occupation',
   'http://dbpedia.org/ontology/Place',
   'http://dbpedia.org/ontology/Relative',
   'http://dbpedia.org/ontology/Training',
   'http://dbpedia.org/ontology/Work'},
  'p': {'http://dbpedia.org/ontology/artist',
   'http://dbpedia.org/ontology/institution',
   'http://dbpedia.org/ontology/number',
   'http://dbpedia.org/ontology/occupation',
   'http://dbpedia.org/ontology/relative',
   'http://dbpedia.org/ontology/training',
   'http://dbpedia.org/property/artist',
   'http://dbpedia.org/property/artists',
   'http://dbpedia.org/property/institution',
   'http://dbpedia.org/property/institutions',
   'http://dbpedia.org/property/institutionss',
   'http://dbpedia.org/property/number',
   'http://dbpedia.org/property/numbers',
   'http://dbpedia.org/property/occupa

In [392]:
sparql = SPARQLWrapper("http://akswnc9.aksw.uni-leipzig.de/dbpedia/sparql")
index = 73
# for index in range(len(data)):
#     if answers_model[index]['query']['sparql'].startswith('SELECT ?uri'):
#         answers_model[index]['query']['sparql'] = answers_model[index]['query']['sparql'].replace('SELECT ?uri', 'SELECT DISTINCT ?uri')
# print(answers_model[index]['query']['sparql'])

sparql.setQuery(answers_model[index]['query']['sparql'])
sparql.setReturnFormat(JSON)    
result_set = sparql.query().convert()

answers_model[index]['answers'] = result_set
print('\n' * 5)







