In [22]:
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
import nltk, inflect, re, string

In [23]:
# https://pypi.python.org/pypi/inflect
p = inflect.engine()

In [62]:
EC2_URI = 'http://ec2-18-188-197-36.us-east-2.compute.amazonaws.com:3030/dbkwik/query'
URI = 'http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter'
# URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen'
# URI = 'http://dbkwik.webdatacommons.org/GTA_Wik/resource/Russia'
# URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow'

In [63]:
def replace_hyphen_with_space(string):
    return string.replace('_', ' ')

def combine_conjunctive_sentences(sents):
    string = sents[0]
    for i in range(1, len(sents)):
        if i == len(sents) - 1:
            string += ' and ' + sents[i]
        else:
            string += ', ' + sents[i]        
    return string

def get_resource_name(URI):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://dbkwik.webdatacommons.org/sparql")
    query = ("""SELECT ?name WHERE {        
        # Get English label of URI
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }       
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
    }
    """)
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    for result in results["results"]["bindings"]:
        if result['name']['value']:
            return result['name']['value']
    return None

In [64]:
def get_ontology_label(ontology):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query = ("""SELECT ?label WHERE {        
        <http://dbpedia.org/ontology/""" + ontology + """> rdfs:label ?label .
        FILTER(lang(?label)='en')
    }
    """)
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    for result in results["results"]["bindings"]:
        return result['label']['value']
        
    return replace_hyphen_with_space(ontology.lower())

In [65]:
def get_basic_info(URI):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://dbkwik.webdatacommons.org/sparql")
    query = ("""SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <""" + URI + """> rdf:type ?type .
        FILTER(contains(str(?type), '""" + ontology_namespace + """')) .
        
        # Get English label of URI
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
                
        # Try to get gender
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/gender> ?gender . }
        
        # Try to get corresponding DBpedia Resource
        OPTIONAL { <""" + URI + """> owl:sameAs ?dbr . }
    }
    """)
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    for result in results["results"]["bindings"]:
        print(result)
        output = {
            'types': result['types']['value'],            
            'dbr': result['dbr']['value']
        }
        
        if 'name'in result:
            output['name'] = result['name']['value']
        if 'gender' in result:
            output['gender'] = result['gender']['value']
        break
        
    return output

basic_info = get_basic_info(URI)

SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> rdf:type ?type .
        FILTER(contains(str(?type), 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology')) .
        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
                
        # Try to get gender
        OPTIONAL { <http://dbkwik.web

In [66]:
def get_top_k_triples(URI, k):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper(EC2_URI)
#     query = ("""SELECT ?predicate ?resource ?r_resource ?rank
#         WHERE {
#           {
#             select ?predicate ?resource ?rank {
#             <""" + URI + """> ?predicate ?resource .
#             ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
#             }
#           }
#           UNION
#           {
#             select ?predicate ?r_resource ?rank {
#             ?r_resource ?predicate <""" + URI + """> .
#             ?r_resource <http://purl.org/voc/vrank#pagerank> ?rank .
#             }
#           }

#           FILTER (?predicate NOT IN (<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, 
#                 <http://purl.org/dc/terms/subject>, 
#                 <http://xmlns.com/foaf/0.1/depiction>, 
#                 <http://www.w3.org/2002/07/owl#sameAs>, 
#                 <""" + ontology_namespace + """/thumbnail>, 
#                 <""" + property_namespace + """/predecessor>,
#                 <""" + property_namespace + """/successor>, 
#                 <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
#                 <http://xmlns.com/foaf/0.1/primaryTopic>)).
#         } ORDER BY DESC(?rank)
#     """)

#     query = """
#     SELECT ?p ?o ?reverse ?rank {
#         SELECT ?p ?o ?reverse ?rank {
#             {
#                 SELECT ?p ?o ?obj_rank (max(?prop_rank) as ?prop_final_rank) ?reverse {
#                 {<""" + URI + """> ?p ?o . BIND(false as ?reverse)} 
#                 UNION {?o ?p <""" + URI + """> . BIND(true as ?reverse)}
                
#                 FILTER (?p NOT IN (
#                   <http://purl.org/dc/terms/subject>, 
#                   <http://xmlns.com/foaf/0.1/depiction>, 
#                   <http://www.w3.org/2002/07/owl#sameAs>, 
#                   <""" + ontology_namespace + """/thumbnail>, 
#                   <""" + property_namespace + """/predecessor>,
#                   <""" + property_namespace + """/successor>, 
#                   <""" + property_namespace + """/name>, 
#                   <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
#                   <http://xmlns.com/foaf/0.1/primaryTopic>                    
#                 )) .
                
#                 OPTIONAL { <""" + URI + """> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?class . ?p ?class ?prop_rank }
#                 OPTIONAL { ?p <http://purl.org/voc/vrank#proprank> ?prop_rank }
#                 OPTIONAL { ?o <http://purl.org/voc/vrank#pagerank> ?obj_rank }
#                 OPTIONAL {FILTER ISLITERAL(?o) . BIND(0.15 as ?obj_rank) }
#                 } GROUP BY ?p ?o ?obj_rank ?reverse
#             }
# #    BIND(?obj_rank * ?prop_final_rank as ?rank) # PROD RANK
#     BIND(?obj_rank * ?prop_final_rank / (?obj_rank + ?prop_final_rank) as ?rank) # HARMONIC RANK
#   }
# } ORDER BY DESC(?rank)    
#     """

    query = """
    SELECT ?p ?p_label ?o ?rank ?reverse {

        {<""" + URI + """> ?p ?o . BIND(false as ?reverse)} UNION {?o ?p <""" + URI + """> . BIND(true as ?reverse)}
                
        FILTER (?p NOT IN (
            <http://purl.org/dc/terms/subject>, 
            <http://xmlns.com/foaf/0.1/depiction>, 
            <http://www.w3.org/2002/07/owl#sameAs>, 
            <""" + ontology_namespace + """/thumbnail>, 
            <""" + property_namespace + """/predecessor>,
            <""" + property_namespace + """/successor>, 
            <""" + property_namespace + """/name>, 
            <""" + property_namespace + """/gender>, 
            <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
            <http://xmlns.com/foaf/0.1/primaryTopic>                    
        )) .
        
        ?p <http://www.w3.org/2000/01/rdf-schema#label> ?p_label .
        FILTER(lang(?p_label)='en') .
                
        OPTIONAL { <""" + URI + """> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?class . ?p ?class ?prop_rank }
        OPTIONAL { ?p <http://purl.org/voc/vrank#proprank> ?prop_rank }
        OPTIONAL { ?o <http://purl.org/voc/vrank#pagerank> ?obj_rank }
        OPTIONAL {FILTER ISLITERAL(?o) . BIND(0.15 as ?obj_rank) }        
        
        #BIND(?obj_rank * ?prop_rank as ?rank) # PROD RANK
        BIND(?obj_rank * ?prop_rank / (?obj_rank + ?prop_rank) as ?rank) # HARMONIC RANK
    } GROUP BY ?o ?p ?p_label ?rank ?reverse ORDER BY DESC(?rank)    
    """
    
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    predicates = []
    for result in results["results"]["bindings"]:
        predicate = result['p']['value']

        if predicate not in predicates:
            predicates.append(predicate)
            output[predicate] = {
                'resources': [],
                'label': result['p_label']['value']
            }
        
        obj = {
            'resource': result['o']['value'],
            'rank': result['rank']['value'],
            'reverse': result['reverse']['value']
        }
        
        output[predicate]['resources'].append(obj)        
            
        if len(predicates) == k:
            break
            
    return output

top_triples = get_top_k_triples(URI, 10)


    SELECT ?p ?p_label ?o ?rank ?reverse {

        {<http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> ?p ?o . BIND(false as ?reverse)} UNION {?o ?p <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> . BIND(true as ?reverse)}
                
        FILTER (?p NOT IN (
            <http://purl.org/dc/terms/subject>, 
            <http://xmlns.com/foaf/0.1/depiction>, 
            <http://www.w3.org/2002/07/owl#sameAs>, 
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology/thumbnail>, 
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/predecessor>,
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/successor>, 
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name>, 
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/gender>, 
            <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
            <http://xmlns.com/foaf/0.

In [69]:
# Maybe handle plural for pronoun and possessive pronoun
summary = ''
pronoun = 'It'
possessive_pronoun = 'Its'

if 'types' in basic_info:
    types = basic_info['types'].split('|')
    summary += basic_info['name'] + ' is '
    types_sents = []
    for i in range(len(types)):
        ontology_type = types[i].split('/')[-1]
        print(ontology_type)
        if ontology_type.lower() == 'agent': # Ignore Agent
            continue
        
        types_sents.append(p.a(get_ontology_label(ontology_type)))
    
    summary += combine_conjunctive_sentences(types_sents) + '. '

if 'gender' in basic_info:
    if basic_info['gender'].lower() == 'male':
        pronoun = 'He'
        possessive_pronoun = 'His'
        summary += 'His gender is male. '
    elif basic_info['gender'].lower() == 'female':
        pronoun = 'She'
        possessive_pronoun = 'Her'
        summary += 'Her gender is female. '

Agent
FictionalCharacter
SELECT ?label WHERE {        
        <http://dbpedia.org/ontology/FictionalCharacter> rdfs:label ?label .
        FILTER(lang(?label)='en')
    }
    
Person
SELECT ?label WHERE {        
        <http://dbpedia.org/ontology/Person> rdfs:label ?label .
        FILTER(lang(?label)='en')
    }
    


In [70]:
# Get predicate name from SPARQL
for predicate in top_triples:
    predicate_name = top_triples[predicate]['label'] # Check if this works properly
    resources = []
    r_resources = []
    
    for index in range(len(top_triples[predicate]['resources'])):
        resource = top_triples[predicate]['resources'][index]                
        resource_name = None
        
        if resource['resource'].startswith('http://'): # URI
            resource_name = get_resource_name(resource['resource'])
        else: # Literal
            if resource['resource'][0] == '*': # Possibly bullet list which was not properly parsed by DBkwik
                _resources = resource['resource'].split('*')
                if len(_resources) > 1: # Then probably a bullet list
                    _resources = _resources[1:]
                    for index2 in range(len(_resources)):
                        _resources[index2] = re.sub(r'[^a-zA-Z0-9 \n\.]', '', _resources[index2]).strip()
                    if resource['reverse'] == 'true':
                        r_resources += _resources
                    else:
                        resources += _resources
            else:
                resource_name = resource['resource']
        
        if resource_name == None: # Continue to next element if resource name was not properly set
            continue
            
        if resource['reverse'] == 'true':
            r_resources.append(resource_name)
        else:
            resources.append(resource_name)    
    
    if len(resources) > 3: # Restrict to 3 items
        resources = resources[:3]
        
    if len(r_resources) > 3: # Restrict to 3 items
        r_resources = r_resources[:3]
            
    if predicate_name == 'born':
        # 
        summary += pronoun + ' was born in ' + combine_conjunctive_sentences(resources) + '. '
    else:
        if len(r_resources) > 0:
            print('need to handle reverse scenario')
        elif len(resources) == 1:
            if p.singular_noun(predicate_name) == False or p.singular_noun(predicate_name) == predicate_name: # If singular predicate or plural and singular forms are the same (eg: species)
                summary += possessive_pronoun + ' ' + predicate_name + ' is ' + resources[0] + '. '
            else:
                summary += possessive_pronoun + ' ' + predicate_name + ' are ' + resources[0] + '. '
        elif len(resources) > 1:
            if p.singular_noun(predicate_name) == False: # Convert to plural form
                predicate_name = p.plural(predicate_name)
            summary += possessive_pronoun + ' ' + predicate_name + ' are ' + combine_conjunctive_sentences(resources) + '. '
    
print(summary)    

SELECT ?name WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }       
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
    }
    
SELECT ?name WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House

SELECT ?name WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/King_in_the_North> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/King_in_the_North> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }       
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/King_in_the_North> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
    }
    
SELECT ?name WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Old_Gods_of_the_Forest> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_