In [2]:
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
import nltk

In [69]:
URI = 'http://dbkwik.webdatacommons.org/HarryPotter/resource/Harry_Potter'
URI = 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen'

In [70]:
def get_basic_info(URI):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://dbkwik.webdatacommons.org/sparql")
    query = ("""SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <""" + URI + """> rdf:type ?type .
        FILTER(contains(str(?type), '""" + ontology_namespace + """')) .
        
        # Get English label of URI
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <""" + URI + """> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
                
        # Try to get gender
        OPTIONAL { <""" + URI + """> <""" + property_namespace + """/gender> ?gender . }
        
        # Try to get corresponding DBpedia Resource
        OPTIONAL { <""" + URI + """> owl:sameAs ?dbr . }
    }
    """)
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = {}
    for result in results["results"]["bindings"]:
        print(result)
        output = {
            'types': result['types']['value'],            
            'dbr': result['dbr']['value']
        }
        
        if 'name'in result:
            output['name'] = result['name']['value']
        if 'gender' in result:
            output['gender'] = result['gender']['value']
        break
        
    return output


get_basic_info(URI)

SELECT (group_concat(?type;separator='|') as ?types) ?name ?gender ?dbr WHERE {        
        # Get Types of URI
        <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> rdf:type ?type .
        FILTER(contains(str(?type), 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology')) .
        
        # Get English label of URI
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/name> ?name . FILTER(lang(?name)='en') . }
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> <http://www.w3.org/2004/02/skos/core#prefLabel> ?name . FILTER(lang(?name)='en') . }
                
        # Try to get gender
        OPTIONAL { <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/gender> ?gender . }
    

{'dbr': 'http://dbpedia.org/resource/Daenerys_Targaryen',
 'name': 'Daenerys Targaryen',
 'types': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology/Agent|http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology/FictionalCharacter|http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/ontology/Person'}

In [74]:
def get_top_k_triples(URI, k):
    wiki = URI.split('/')[3]
    ontology_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/ontology"
    property_namespace = "http://dbkwik.webdatacommons.org/" + wiki + "/property"
    
    sparql = SPARQLWrapper("http://ec2-18-219-186-206.us-east-2.compute.amazonaws.com:3030/dbkwik_data/query")
    query = ("""SELECT ?predicate (group_concat(distinct ?resource; separator="|") as ?resources) (AVG(?rank) as ?ranks)
        WHERE {
          {
            select ?predicate ?resource ?rank {
            <""" + URI + """> ?predicate ?resource .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }
          UNION
          {
            select ?predicate ?resource ?rank {
            ?resource ?predicate <""" + URI + """> .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }

          FILTER (?predicate NOT IN (<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, 
                <http://purl.org/dc/terms/subject>, 
                <http://xmlns.com/foaf/0.1/depiction>, 
                <http://www.w3.org/2002/07/owl#sameAs>, 
                <""" + ontology_namespace + """/thumbnail>, 
                <""" + property_namespace + """/predecessor>,
                <""" + property_namespace + """/successor>, 
                <http://xmlns.com/foaf/0.1/isPrimaryTopicOf>, 
                <http://xmlns.com/foaf/0.1/primaryTopic>)).
        } GROUP BY ?predicate ?ranks ORDER BY DESC(?ranks)
        LIMIT """ + str(k) + """
    """)
    
    print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    output = []
    for result in results["results"]["bindings"]:
        obj = {
            'predicate': result['predicate']['value'],
            'resources': result['resources']['value'],
            'ranks': result['ranks']['value']
        }
        
        output.append(obj)        
    return output

get_top_k_triples(URI, 10)

SELECT ?predicate (group_concat(distinct ?resource; separator="|") as ?resources) (AVG(?rank) as ?ranks)
        WHERE {
          {
            select ?predicate ?resource ?rank {
            <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> ?predicate ?resource .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }
          UNION
          {
            select ?predicate ?resource ?rank {
            ?resource ?predicate <http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Daenerys_Targaryen> .
            ?resource <http://purl.org/voc/vrank#pagerank> ?rank .
            }
          }

          FILTER (?predicate NOT IN (<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, 
                <http://purl.org/dc/terms/subject>, 
                <http://xmlns.com/foaf/0.1/depiction>, 
                <http://www.w3.org/2002/07/owl#sameAs>, 
                <http://dbkwik.webdatacommons.org/Game_of_Thrones

[{'predicate': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/status',
  'ranks': '26396.117082796845e0',
  'resources': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Category:Status:_Alive'},
 {'predicate': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/culture',
  'ranks': '2430.0486329531254e0',
  'resources': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Valyrians'},
 {'predicate': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/rulers',
  'ranks': '2241.9388447968754e0',
  'resources': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/Great_Pyramid'},
 {'predicate': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/aka',
  'ranks': '784.2969714375001e0',
  'resources': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/resource/The_Prince_That_Was_Promised'},
 {'predicate': 'http://dbkwik.webdatacommons.org/Game_of_Thrones_Wik/property/allegiance',
  'ranks': '650.46294814