In [12]:
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
import nltk, inflect

In [17]:
# https://pypi.python.org/pypi/inflect
p = inflect.engine()
URI = 'http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter'
# URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen'

In [31]:
def replace_hyphen_with_space(string):
    return string.replace('_', ' ')

def combine_conjunctive_sentences(sents):
    string = sents[0]
    for i in range(1, len(sents)):
        if i == len(sents) - 1:
            string += ' and ' + sents[i]
        else:
            string += ', ' + sents[i]        
    return string

In [10]:
def get_basic_info(URI):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://dbkwik.webdatacommons.org/sparql")
    query = ("""SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <""" + URI + """> rdf:type ?type .
        FILTER(contains(str(?type), '""" + ontology_namespace + """')) .
        
        # Get English label of URI
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
                
        # Try to get gender
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/gender> ?gender . }
        
        # Try to get corresponding DBpedia Resource
        OPTIONAL { <""" + URI + """> owl:sameAs ?dbr . }
    }
    """)
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    for result in results["results"]["bindings"]:
        print(result)
        output = {
            'types': result['types']['value'],            
            'dbr': result['dbr']['value']
        }
        
        if 'name'in result:
            output['name'] = result['name']['value']
        if 'gender' in result:
            output['gender'] = result['gender']['value']
        break
        
    return output

basic_info = get_basic_info(URI)

SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> rdf:type ?type .
        FILTER(contains(str(?type), 'http://dbkwik.webdatacommons.org/HarryPotter/ontology')) .
        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> <http://dbkwik.webdatacommons.org/HarryPotter/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
                
        # Try to get gender
        OPTIONAL { <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> <http://dbkwik.webdatacommons.org/HarryPotter/property/gender> ?gender . }
        
        # Try to get corresponding DBpedia Resource
        OPTIONAL { <htt

In [11]:
def get_top_k_triples(URI, k):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://ec2-18-219-186-206.us-east-2.compute.amazonaws.com:3030/dbkwik/query")
    query = ("""SELECT ?predicate (group_concat(distinct ?resource; separator="|") as ?resources) (AVG(?rank) as ?ranks)
        WHERE {
          {
            select ?predicate ?resource ?rank {
            <""" + URI + """> ?predicate ?resource .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }
          UNION
          {
            select ?predicate ?resource ?rank {
            ?resource ?predicate <""" + URI + """> .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }

          FILTER (?predicate NOT IN (<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, 
                <http://purl.org/dc/terms/subject>, 
                <http://xmlns.com/foaf/0.1/depiction>, 
                <http://www.w3.org/2002/07/owl#sameAs>, 
                <""" + ontology_namespace + """/thumbnail>, 
                <""" + property_namespace + """/predecessor>,
                <""" + property_namespace + """/successor>, 
                <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
                <http://xmlns.com/foaf/0.1/primaryTopic>)).
        } GROUP BY ?predicate ?ranks ORDER BY DESC(?ranks)
        LIMIT """ + str(k) + """
    """)
    
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = []
    for result in results["results"]["bindings"]:
        obj = {
            'predicate': result['predicate']['value'],
            'resources': result['resources']['value'],
            'ranks': result['ranks']['value']
        }
        
        output.append(obj)        
    return output

top_triples = get_top_k_triples(URI, 10)

SELECT ?predicate (group_concat(distinct ?resource; separator="|") as ?resources) (AVG(?rank) as ?ranks)
        WHERE {
          {
            select ?predicate ?resource ?rank {
            <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> ?predicate ?resource .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }
          UNION
          {
            select ?predicate ?resource ?rank {
            ?resource ?predicate <http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter> .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }

          FILTER (?predicate NOT IN (<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, 
                <http://purl.org/dc/terms/subject>, 
                <http://xmlns.com/foaf/0.1/depiction>, 
                <http://www.w3.org/2002/07/owl#sameAs>, 
                <http://dbkwik.webdatacommons.org/HarryPotter/ontology/thumbnail>, 
         

In [35]:
summary = ''
pronoun = 'It'

if 'types' in basic_info:
    types = basic_info['types'].split('|')
    summary += basic_info['name'] + ' is '
    types_sents = []
    for i in range(len(types)):
        type = types[i].split('/')[-1].lower()
        if type == 'agent': # Ignore Agent
            continue
        
        types_sents.append(p.a(type))
    
    summary += combine_conjunctive_sentences(types_sents) + '. '

if 'gender' in basic_info:
    if basic_info['gender'].lower() == 'male':
        pronoun = 'He'
        summary += 'His gender is male. '
    elif basic_info['gender'].lower() == 'female':
        pronoun = 'She'
        summary += 'Her gender is female. '

In [38]:
p.a('Russian Federation')

'a Russian Federation'