In [13]:
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
import nltk, inflect, re, string, os

In [113]:
# https://pypi.python.org/pypi/inflect
p = inflect.engine()
os.environ['EC2_URI'] = 'http://ec2-18-188-157-163.us-east-2.compute.amazonaws.com:3030/dbkwik/query'
%run helpers.py

In [114]:
# URI = 'http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter'
# URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen'
# URI = 'http://dbkwik.webdatacommons.org/GTA_Wik/resource/Russia'
URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow'

In [115]:
basic_info = get_basic_info(URI)
top_triples = get_top_k_triples(URI, 10)

SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> rdf:type ?type .
        FILTER(contains(str(?type), 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology')) .
        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Jon_Snow> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
                
        # Try to get gender
        OPTIONAL { <http://dbkwik.web

In [120]:
# Maybe handle plural for pronoun and possessive pronoun
summary = ''
pronoun = 'It'
possessive_pronoun = 'Its'
name = basic_info['name']

if 'types' in basic_info:
    types = basic_info['types'].split('|')
    summary += name + ' is '
    types_sents = []
    for i in range(len(types)):
        ontology_type = types[i].split('/')[-1]
        print(ontology_type)
        if ontology_type.lower() == 'agent': # Ignore Agent
            continue
        
        types_sents.append(p.a(get_ontology_label(ontology_type)))
    
    summary += combine_conjunctive_sentences(types_sents) + '. '

if 'gender' in basic_info:
    if basic_info['gender'].lower() == 'male':
        pronoun = 'He'
        possessive_pronoun = 'His'
        summary += 'His gender is male. '
    elif basic_info['gender'].lower() == 'female':
        pronoun = 'She'
        possessive_pronoun = 'Her'
        summary += 'Her gender is female. '

Agent
FictionalCharacter
SELECT ?label WHERE {        
        <http://dbpedia.org/ontology/FictionalCharacter> rdfs:label ?label .
        FILTER(lang(?label)='en')
    }
    
Person
SELECT ?label WHERE {        
        <http://dbpedia.org/ontology/Person> rdfs:label ?label .
        FILTER(lang(?label)='en')
    }
    


In [121]:
# Get predicate name from SPARQL
for predicate in top_triples:
    predicate_name = top_triples[predicate]['label'] # Check if this works properly
    resources = []
    r_resources = []
    
    for index in range(len(top_triples[predicate]['resources'])):
        resource = top_triples[predicate]['resources'][index]                
        resource_name = None
        
        if resource['resource'].startswith('http://'): # URI
            resource_name = get_resource_name(resource['resource'])
        else: # Literal
            if resource['resource'][0] == '*': # Possibly bullet list which was not properly parsed by DBkwik
                _resources = resource['resource'].split('*')
                if len(_resources) > 1: # Then probably a bullet list
                    _resources = _resources[1:]
                    for index2 in range(len(_resources)):
                        _resources[index2] = re.sub(r'[^a-zA-Z0-9 \n\.]', '', _resources[index2]).replace('{', '').replace('}', '').strip()
                    if resource['reverse'] == 'true':
                        r_resources += _resources
                    else:
                        resources += _resources
            else:
                resource_name = resource['resource'].replace('{', '').replace('}', '') # Handling parsing error where entity might have {}
        
        if resource_name == None: # Continue to next element if resource name was not properly set
            continue
            
        if resource['reverse'] == 'true':
            r_resources.append(resource_name)
        else:
            resources.append(resource_name)    
    
    if len(resources) > 3: # Restrict to 3 items
        resources = resources[:3]
        
    if len(r_resources) > 3: # Restrict to 3 items
        r_resources = r_resources[:3]
            
    if predicate_name == 'born':
        # This part would be for verbs
        summary += pronoun + ' was born in ' + combine_conjunctive_sentences(resources) + '. '
    else:
        if len(resources) == 1:
            if p.singular_noun(predicate_name) == False or p.singular_noun(predicate_name) == predicate_name: # If singular predicate or plural and singular forms are the same (eg: species)
                summary += possessive_pronoun + ' ' + predicate_name + ' is ' + resources[0] + '. '
            else:
                summary += possessive_pronoun + ' ' + predicate_name + ' are ' + resources[0] + '. '
        elif len(resources) > 1:
            if p.singular_noun(predicate_name) == False: # Convert to plural form
                predicate_name = p.plural(predicate_name)
            summary += possessive_pronoun + ' ' + predicate_name + ' are ' + combine_conjunctive_sentences(resources) + '. '

        if len(r_resources) > 0:
            r_resources = [get_possessive_form(resource) for resource in r_resources]
            if p.singular_noun(predicate_name):
                predicate_name = p.singular_noun(predicate_name)
            summary += combine_conjunctive_sentences(r_resources) + ' ' + predicate_name + ' is ' + name + '. '
            
print(summary)    

SELECT ?name ?dbr WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }       
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://www.w3.org/2000/01/rdf-schema#label> ?name . FILTER(lang(?name)='en') . }        
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/House_Stark> <http://www.w3.org/2002/07/owl#sameAs> ?dbr . }
    }
    
SELECT ?name ?dbr WHERE {        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Tormund> <http://dbkwik.webdatacommons.org/Game_of_Th

In [None]:
top_triples