In [1]:


from rdflib import Graph
from rdflib.namespace import Namespace, RDF, RDFS, XSD
from rdflib.term import URIRef, Literal
import csv
import json
import networkx as nx
import pandas as pd
import rdflib
from collections import defaultdict, Counter
import locale
_ = locale.setlocale(locale.LC_ALL, '')
from _plotly_future_ import v4_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import numpy as np
import os
from sklearn.metrics import pairwise_distances

from spacy.matcher import PhraseMatcher

# In[58]:


MultifactOriented="Multifact_oriented"
FactOriented = "Fact_oriented"
RecommendationOriented = "Recommendation_oriented"
PlotOriented = "Plot_oriented"

In [2]:


def build_movie_graph():
    # g_movie = Graph()
    # g_movie.parse("./14_graph.ttl", format='ttl')

    # prefixes used in the graph

    graph_movie = rdflib.Graph()
    graph_movie.parse('./14_graph.nt', format='turtle')
    print(f"The graph has {len(graph_movie)} triples.")

    #graph statistics
    WD = Namespace('http://www.wikidata.org/entity/')
    WDT = Namespace('http://www.wikidata.org/prop/direct/')
    SCHEMA = Namespace('http://schema.org/')
    DDIS = Namespace('http://ddis.ch/atai/')

    entities = set(graph_movie.subjects()) | {s for s in graph_movie.objects() if isinstance(s, URIRef)}
    #weiyu
    predicates = set(graph_movie.predicates())
    literals = {s for s in graph_movie.objects() if isinstance(s, Literal)}
    with_type = set(graph_movie.subjects(WDT['P31'], None))
    with_super = set(graph_movie.subjects(WDT['P279'], None))
    types = set(graph_movie.objects(None, WDT['P31']))
    supers = set(graph_movie.objects(None, WDT['P279']))
    with_label = set(graph_movie.subjects(RDFS.label, None))

    n_ents = len(entities)
    n_rels = len(predicates)
    n_lits = len(literals)
    t_tot = len(graph_movie)
    t_ent = len([1 for s, p, o in graph_movie.triples((None, None, None)) if isinstance(o, URIRef)])
    t_lit = t_tot - t_ent
    n_notype = len(entities - with_type - with_super)
    n_notype_flt = len(entities - with_type - with_super - types - supers)

    pd.DataFrame([
        ('number of entities', f'{n_ents:n}'),
        ('number of literals', f'{n_lits:n}'),
        ('number of predicates', f'{n_rels:n}'),
        ('number of triples', f'{t_tot:n}'),
        ('number of ent-ent triples', f'{t_ent:n}'),
        ('number of ent-lit triples', f'{t_lit:n}'),
        ('number of entities w/o label', f'{len(entities - with_label):n}'),
        ('number of predicates w/o label', f'{len(predicates - with_label):n}'),
        ('number of entities w/o type', f'{n_notype:n}'),
        ('number of instances w/o type', f'{n_notype_flt:n}'),
    ])


    # read_extract_data()
    top250 = set(open('./imdb-top-250.t').read().split('\n')) - {''}
    types_of_encoding = "utf8"
    # "cp1252"
    data_top=pd.DataFrame([
        ('Top-250 coverage', '{:n}'.format(
            len(top250 & {str(o) for o in graph_movie.objects(None, WDT.P345) if o.startswith('tt')}))),
        ('Entities with IMDb ID', '{:n}'.format(
            len({str(o) for o in graph_movie.objects(None, WDT.P345) if o.startswith('tt')}))),
        ('Plots linked to a movie', '{:n}'.format(
            len({qid for qid, plot in csv.reader(open('./12_plots.csv', encoding=types_of_encoding)) if
                 URIRef(qid) in entities}))),
        ('Comments linked to a movie', '{:n}'.format(
            len([qid for qid, rating, sentiment, comment in csv.reader(open('./12_comments.csv')) if
                 URIRef(qid) in entities]))),
        ('Movies having at least one comment', '{:n}'.format(
            len({qid for qid, rating, sentiment, comment in csv.reader(open('./12_comments.csv')) if
                 URIRef(qid) in entities}))),
    ])
    print(data_top)
    return graph_movie


In [3]:
import re
def build_nodes_predicates():
    nodes = {}
    for node in graph.all_nodes():
        if isinstance(node, URIRef):
            if graph.value(node, RDFS.label):
                #print("node:",node,RDFS.label)
                nodes[node.toPython()] = graph.value(node, RDFS.label).toPython()
                
#               print(nodes[node.toPython()],type(nodes[node.toPython()]))
            else:
                nodes[node.toPython()] = re.sub("http://www.wikidata.org/entity/", "", node.toPython())
 #               print(nodes[node.toPython()],type(nodes[node.toPython()]))
    #print(nodes,type(nodes))
    '''
    for s, p, o in graph_movie:
        if graph_movie.value(p, RDFS.label):
            predicates[p.toPython()] = graph_movie.value(p, RDFS.label).toPython()
        else:
            predicates[p.toPython()] = re.sub("http://www.wikidata.org/prop/direct/", "", p.toPython())
    '''
    predicates_all={}
    predicates = {}
    for result in SPARQL_results["results"]["bindings"]:
        key = result['property']['value'].replace("entity","prop/direct")
        #print("key",key,type(key))
        #if key in {str(s) for s in graph.predicates()}:
           #print(result)
        if 'propertyAltLabel' in result.keys():
            #for value in result['propertyAltLabel']['value'].split(","):
            value=result['propertyAltLabel']['value'] + "," + result['propertyLabel']['value']
            #print("value",value,type(value))
            predicates_all[key] = value
            #print("key,value",key,value)
        #print("predicates_all[key]",predicates_all[key])
        else:
            predicates_all[key] = result['propertyLabel']['value']
            #print("key,propertyLabel",key,result['propertyLabel']['value'])

    for value in {str(s) for s in graph.predicates()}:
        if value in predicates_all.keys():
            for key in predicates_all[value].split(","):
                #print("key,value",key,value)
            #for item in res:
                predicates[key]=value
    return nodes,predicates

In [4]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

query = """#All properties with descriptions and aliases and types
SELECT ?property ?propertyType ?propertyLabel ?propertyDescription ?propertyAltLabel WHERE {
  ?property wikibase:propertyType ?propertyType .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY ASC(xsd:integer(STRAFTER(STR(?property), 'P')))"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

In [5]:
def find_movie_type():
    wdt_movie="http://www.wikidata.org/prop/direct/P31"
    wd="http://www.wikidata.org/entity/Q11424"
    wdt_genre="http://www.wikidata.org/prop/direct/P136"
    #query_template = 'SELECT DISTINCT ?x ?y WHERE {{ ?movie rdfs:label "{}"@en . ?movie <{}> ?x . ?x <{}> ?y .}}'.format(entity, match_pred, RDFS.label)
    #query_template = 'SELECT DISTINCT ?movie ?label WHERE {{ ?movie <{}> <{}> . ?human rdfs:label "{}"@en. ?movie <{}> ?human . ?movie <{}> ?label .}}'.format(wdt_movie, wd,  human, wdt_director, RDFS.label)
    
    str_temp='''SELECT DISTINCT ?movie ?type ?label WHERE 
    {{
        ?movie <{}> <{}> .
        ?movie <{}> ?type .
        ?type <{}> ?label .
    }}
    '''
    
    query_template = str_temp.format(wdt_movie,wd,wdt_genre,RDFS.label)

    print("--- sparql query: {}".format(query_template))

    qres = graph.query(query_template)

    print("\n--- querying results: ")
    #print(qres,len(qres),type(qres))
    movie_list=[]
    movie_dic={}
    
    if(len(qres)<1):
        return None
    else:
        for row in qres:
            lab=str(row.label.split(" film")[0])
            movie_dic[lab]=row.type
          #  print("row",lab,row.type)
    return movie_dic
     

In [6]:
graph = build_movie_graph()


The graph has 2056777 triples.
                                    0       1
0                    Top-250 coverage     243
1               Entities with IMDb ID  27’882
2             Plots linked to a movie  10’366
3          Comments linked to a movie  26’491
4  Movies having at least one comment   2’454


In [7]:
SPARQL_results = get_results(endpoint_url, query)

In [8]:

nodes, predicates = build_nodes_predicates()

In [9]:
type_list = find_movie_type()    

--- sparql query: SELECT DISTINCT ?movie ?type ?label WHERE 
    {
        ?movie <http://www.wikidata.org/prop/direct/P31> <http://www.wikidata.org/entity/Q11424> .
        ?movie <http://www.wikidata.org/prop/direct/P136> ?type .
        ?type <http://www.w3.org/2000/01/rdf-schema#label> ?label .
    }
    

--- querying results: 


In [10]:
len(type_list)

337

In [11]:
WD = rdflib.Namespace('http://www.wikidata.org/entity/')
WDT = rdflib.Namespace('http://www.wikidata.org/prop/direct/')
DDIS = rdflib.Namespace('http://ddis.ch/atai/')
RDFS = rdflib.namespace.RDFS
SCHEMA = rdflib.Namespace('http://schema.org/')
    
entity_emb = np.load(os.path.join('entity_embeds.npy'))
relation_emb = np.load(os.path.join('relation_embeds.npy'))
with open(os.path.join( 'data', 'fb15k-embeds', 'entity_ids.del'), 'r') as ifile:
    # index -> identifier mapping
    id2ent = {int(k): 'http://rdf.freebase.com/ns/' + v.replace('/m/', 'm.')
              for k, v in csv.reader(ifile, delimiter='\t')}
    # identifier -> index mapping
    ent2id = {v: k for k, v in id2ent.items()}
print(len(ent2id))
with open(os.path.join( 'data', 'fb15k-embeds', 'relation_ids.del'), 'r') as ifile:
    '''
    rel2id = {rdflib.term.URIRef(rel): int(idx) for idx, rel in csv.reader(ifile, delimiter='\t')}
    id2rel = {v: k for k, v in rel2id.items()}
    '''
    id2rel = {int(k): v
                for k, v in csv.reader(ifile, delimiter='\t')}
        # identifier -> index mapping
    rel2id = {v: k for k, v in id2rel.items()}


14541


In [12]:


    
len(id2ent)
# load entity identifier <-> label mapping
print(os.path.join('data', 'fb15k-embeds', 'entity_labels.csv'))
with open(os.path.join('data', 'fb15k-embeds', 'entity_labels.csv'), 'r') as ifile:
        # identifier -> label mapping
    ent2label = {ent: lbl for ent, lbl in csv.reader(ifile)}
        # label -> identifier mapping
    label2ent = {v: k for k, v in ent2label.items()}
print("label2ent",len(label2ent),len(ent2label))
len(ent2label)

with open(os.path.join('entity_ids.del'), 'r') as ifile:
    ent2id2 = {rdflib.term.URIRef(ent): int(idx) for idx, ent in csv.reader(ifile, delimiter='\t')}
    id2ent2 = {v: k for k, v in ent2id2.items()}
with open(os.path.join('relation_ids.del'), 'r') as ifile:
    rel2id2 = {rdflib.term.URIRef(rel): int(idx) for idx, rel in csv.reader(ifile, delimiter='\t')}
    id2rel2 = {v: k for k, v in rel2id2.items()}
print(len(id2rel2))
print("id2ent2",len(id2ent2))
    # load entity identifier -> rdf:type mapping
ent2type = defaultdict(set)
with open(os.path.join( 'data', 'fb15k-embeds', 'types.csv'), 'r') as ifile:
    for ent, tpe in csv.reader(ifile):
        ent2type[ent].add(tpe)

print(len(ent2type))

type2ent = defaultdict(set)
for ent, types in ent2type.items():
    for tpe in types:
        type2ent[tpe].add(ent)

type2len = {tpe: len(ent) for tpe, ent in type2ent.items()}

    # load TransE embedding vectors
transE_ent = np.load(os.path.join( 'data', 'fb15k-embeds', 'fb-transe-ents.npy'))
transE_rel = np.load(os.path.join('data', 'fb15k-embeds', 'fb-transe-rels.npy'))
transE_ent.shape, transE_rel.shape

ent2lbl = {ent: str(lbl) for ent, lbl in graph.subject_objects(RDFS.label)}
lbl2ent = {lbl: ent for ent, lbl in ent2lbl.items()}

data\fb15k-embeds\entity_labels.csv
label2ent 14247 14517
248
id2ent2 158901
14517


In [13]:


import spacy
import re
import rdflib

from rdflib import Graph, URIRef, Literal
from rdflib import Namespace

import editdistance
from rdflib.namespace import Namespace, RDF, RDFS, XSD
def recognize_text(raw_message,movie_flag):
    # nlp = transformers.pipeline("conversational",
    #                         model="microsoft/DialoGPT-medium")
    # chat = nlp(transformers.Conversation(raw_message), pad_token_id=50256)
    print("raw_message",raw_message)
    NER = spacy.load("en_core_web_md")
    text1 = NER(raw_message)

    print("text1",text1)
    
    relation_type,relation_pattern=match_relation(raw_message)
    print("text1.ents",text1.ents,len(text1.ents))
    if(len(text1.ents)<1):
        if relation_type!=RecommendationOriented and relation_type!=PlotOriented:
            print("relation_type",relation_type)
            return None,None,None
    if len(text1.ents)>0:
        entity=text1.ents[0].text
    else:
        entity=None
    print("entity",entity)
    relation_list=get_relation_list(raw_message,entity)
    
    print("relation_type",relation_type)
    print("relation_list",relation_list)
    if relation_list!=None and len(relation_list)>1:
        relation_list.reverse()
    print("relation_list",relation_list)
    
    
    if relation_type==MultifactOriented:
        
        if len(relation_list)>=1:
            entity_list,relation = find_entity_relation_pattern(entity,relation_list,movie_flag)
            
         #   print("entity,relation", entity,relation)
        
            return entity_list,relation,relation_type
        else:
            return None,None,None
    elif relation_type==PlotOriented:   
        print("relation_pattern",relation_pattern,type(relation_pattern))
        if entity!=None and len(relation_list)>=1:
            print("relation_list[:1]",relation_list[:1])
            entity_list,relation = find_entity_relation_pattern(entity,relation_list[:1],movie_flag,10)
            
            print("entity,relation", entity_list,relation)
        
            return entity_list,relation_pattern,relation_type
        else:
            if type(entity)==str:
                entity=[entity]
            return entity,relation_pattern,relation_type
            
    elif relation_type==RecommendationOriented:
        relation = relation_pattern
        return entity,relation,relation_type
    elif relation_type==FactOriented:
        
        NER = spacy.load("en_core_web_md")
        text1 = NER(raw_message)
        print(text1)
        entity_list=[]
        for word in text1.ents:
            entity_list.append(word)
        return entity_list,None,relation_type
        
        
    else:
        return None,None,None
    
   

In [14]:
def get_relation_list(question,entity):
    
    '''
    question_pattern = relation_pattern+" is the (.*) of ENTITY"
    #the name of the lead actor in the
    question_pattern_complex = relation_pattern+" is the (.*) of the (.*) in the ENTITY"
    print("question pattern: {}\n".format(question_pattern))
    question = re.sub(entity, "ENTITY", raw_message.rstrip("?"))  # preprocess the question
    match_group=re.match(question_pattern, question)
    match_num=re.findall(question_pattern, question)
    '''
    if entity==None:
        return None
    relation_list = []
    
    question_pattern_list=[]
    
    question_pattern_list.append("the (.*) of the (.*) in the ENTITY")
    question_pattern_list.append("the (.*) of the (.*) in the movie ENTITY")
    question_pattern_list.append("the (.*) of the (.*) in movie ENTITY")
    question_pattern_list.append("the (.*) of the (.*) in ENTITY")
    question_pattern_list.append("the (.*) of the (.*) of the ENTITY")
    question_pattern_list.append("the (.*) of the (.*) of the movie ENTITY")
    question_pattern_list.append("the (.*) of the (.*) of movie ENTITY")
    question_pattern_list.append("the (.*) of the (.*) of ENTITY")
    question_pattern_list.append("the (.*) of ENTITY")
    question_pattern_list.append("the (.*) of movie ENTITY")
    question_pattern_list.append("the (.*) of the ENTITY")
    question_pattern_list.append("the (.*) of the movie ENTITY")
    index=0
    match_num=[]
    while len(match_num)<1 and index<len(question_pattern_list):
        question_pattern = question_pattern_list[index]

#        print("question pattern: {}\n".format(question_pattern))

        question = re.sub(entity, "ENTITY", question.rstrip("?"))  # preprocess the question
 #       print("question",question)
        match_group=re.match(question_pattern, question)
        match_num=re.findall(question_pattern, question)
       # print("match_num",match_num)
        index+=1
    if len(match_num)>=1:

        relation_list = re.findall(question_pattern, question)
        #.group(1)  # match the relation using a pattern
        print("recognized relation: {}\n".format(relation_list))
        if type(relation_list[0])!=str:
     #       print(type(relation_list[0]))
            return list(relation_list[0])
        return list(relation_list)
    
    return relation_list

question = "What is the name of the Batman movie?"
question = "What is the name of the actor of the Batman movie?"

entity = "Batman"
relation_pattern = ""
relation_list=get_relation_list(question,entity)
print(relation_list)


recognized relation: [('name', 'actor')]

['name', 'actor']


In [15]:
def find_entity_relation_pattern(entity,relation_list,movie_flag,N=1):
    index=0
    for relation in relation_list:
        index+=1 
        tmp = 9999
        match_node = ""
        print("--- entity matching for \"{}\"\n".format(entity))
        if movie_flag:
            if find_movie_entity(entity) == None:
                return None
            match_node=str(find_movie_entity(entity)[0])
            movie_flag=False
        else:
            for key, value in nodes.items():
                    #print("key,value:",key,value)
                    #print("edit distance between {} and {}: {}".format(value, entity, editdistance.eval(value, entity)))
                if editdistance.eval(value, entity) < tmp:
                    tmp = editdistance.eval(value, entity)
                    match_node = key
            
        tmp_max = 9999
        match_pred = ""
        relation_changed=False
        relation_origin=relation
        if relation == "director":
            relation = "movie director"
            relation_changed = True
        if "lead actor" in relation:
            relation_changed = True
            relation = "actor"
        
        print("\n--- relation matching for \"{}\"\n".format(relation))
        tmp = 9999
        distan_dict={}
        for value, key in predicates.items():
            #print("key,value:",key,value)
            #print("edit distance between {} and {}: {}".format(value, relation, editdistance.eval(value, relation)))
            if editdistance.eval(value, relation) < tmp:
                match_pred = key
                tmp = editdistance.eval(value, relation)
                print("key,dist",key,tmp)
                '''
                tmp = editdistance.eval(value, relation)
                if key in distan_dict.keys():
                    distan_dict[key]=(distan_dict[key]+tmp)/2
                else:
                    distan_dict[key]=tmp
                if distan_dict[key]<tmp_max:
                    match_pred = key
                    tmp_max = distan_dict[key]
                    print("match_pred",match_pred,distan_dict[key])
                    #print("match_pred",match_pred,type(match_pred))
                '''
        if relation_changed == True:
            relation = relation_origin
        print("\n--- the matching node of \"{}\" is {}\n".format(entity, match_node))
        print("--- the matching predicates of \"{}\" is {}\n".format(relation, match_pred))
        entity_list=build_embeddings(match_node,match_pred,N)
   #     print("entity_list",entity_list)
        if index==len(relation_list):
            if N>len(entity_list):
                N=len(entity_list)
            entity=entity_list[:N]
        else:
            entity=entity_list[0]
        
    print("entity,relation",entity,relation)
    return entity,relation

In [16]:
def match_movie_type(text_doc):
    nlp=spacy.load("en_core_web_md")
    text_doc=nlp(text_doc)
    matcher = PhraseMatcher(nlp.vocab)
    #matcher.add("OBAMA", [nlp("Barack Obama")])
    Type_terms = type_list.keys()
    Type_patterns = [nlp(text) for text in Type_terms]
    matcher.add("Type_patterns", Type_patterns)
    
    matches = matcher(text_doc)
   # print("matches",matches)
    if len(matches)<1:
        return None
    match_id, start, end = matches[0]
    span = nlp.vocab.strings[match_id]
    #print("match_id, start, end",match_id, start, end)
    #print(nlp.vocab.strings[match_id], text_doc[start:end])
    movie_type=text_doc[start:end]
   # print(movie_type,type_list[str(movie_type)])
match_movie_type("action movie")

In [17]:

def match_relation(text_doc):
    nlp=spacy.load("en_core_web_md")
    text_doc=nlp(text_doc)
    matcher = PhraseMatcher(nlp.vocab)
    #matcher.add("OBAMA", [nlp("Barack Obama")])
    Multifact_oriented_terms = ['Who', 'When', 'Where', 'What','who', 'when', 'where', 'what']
    Multifact_oriented_patterns = [nlp(text) for text in Multifact_oriented_terms]
    matcher.add("Multifact_oriented", Multifact_oriented_patterns)

    Fact_oriented_terms = ['Did', 'Is','did']
    Fact_oriented_patterns = [nlp(text) for text in Fact_oriented_terms]
    matcher.add("Fact_oriented", Fact_oriented_patterns)

    Recommondation_oriented_terms = ['recommend', 'Recommend']
    Recommendation_oriented_patterns = [nlp(text) for text in Recommondation_oriented_terms]
    matcher.add("Recommendation_oriented", Recommendation_oriented_patterns)

    Plot_oriented_terms = ['show','picture','pictures','Show','image','photo']
    Plot_oriented_patterns = [nlp(text) for text in Plot_oriented_terms]
    matcher.add("Plot_oriented", Plot_oriented_patterns)

    matches = matcher(text_doc)
#    print("matches",matches)
    if len(matches)<1:
        return None,None
    match_id, start, end = matches[0]
    span = nlp.vocab.strings[match_id]
   # print("match_id, start, end",match_id, start, end)
    
  #  print(nlp.vocab.strings[match_id], text_doc[start:end])
    relation_type,relation_pattern=nlp.vocab.strings[match_id], text_doc[start:end]
    if relation_type==PlotOriented:
        #print("is plotOriented")
        plot_type_terms=['behind_the_scenes','event','poster','product','production_art','publicity','still_frame','unknown','user_avatar']
        Plot_type_patterns = [nlp(text) for text in plot_type_terms]
        matcher_plot = PhraseMatcher(nlp.vocab)
        matcher_plot.add("Plot_type", Plot_type_patterns)
        matches_type = matcher_plot(text_doc)
       # print("matches_type",matches_type)
        if len(matches_type)<1:
            relation_pattern = "none"
        else:
            match_id, start, end = matches_type[0]
            span = nlp.vocab.strings[match_id]
            #print("match_id, start, end",match_id, start, end)

           # print(nlp.vocab.strings[match_id], text_doc[start:end])
            relation_pattern=text_doc[start:end]
    
    return relation_type,str(relation_pattern)
    

In [18]:


def build_embeddings(match_node,match_pred,N=1):
    
    #print(entity_emb.shape,type(entity_emb))
    #print(relation_emb.shape,type(relation_emb))
    #print(relation_emb.shape,type(relation_emb))
   # print(match_node)
    head = entity_emb[ent2id2[rdflib.term.URIRef(match_node)]]
    #print(head)
     # "occupation" relation
    #print("type(match_pred)",type(match_pred))
    pred = relation_emb[rel2id2[rdflib.term.URIRef(match_pred)]]
    # add vectors according to TransE scoring function.
    lhs = head + pred
    # compute distance to *any* entity
    dist = pairwise_distances(lhs.reshape(1, -1), entity_emb).reshape(-1)
    # find most plausible entities
    most_likely = dist.argsort()
     # compute ranks of entities
    ranks = dist.argsort().argsort()
    #print("head:",head)
    #print("pred:",pred)
    #print("lhs:",lhs)
    #print("dist:",dist)
    #print("most_likely:",most_likely)
    #print("ranks:",ranks)
    #for rank, idx in enumerate(most_likely[:10]):
    #    print("rank",rank,idx)
    data_rank=pd.DataFrame([
        (id2ent2[idx][len(WD):], ent2lbl[id2ent2[idx]], dist[idx], rank+1)
        for rank, idx in enumerate(most_likely[:10])],
        columns=('Entity', 'Label', 'Score', 'Rank'))
    print("data_rank",data_rank)
    '''
    index = np.argmin(dist)
    answer_template=ent2lbl[id2ent[index]]
    '''
    entity_list = []
    for rank, idx in enumerate(most_likely[:N]):
        entity_list.append(ent2lbl[id2ent2[idx]])
    #print("entity_list:£",entity_list)
    return entity_list

node = "http://www.wikidata.org/entity/Q102225"
pre = "http://www.wikidata.org/prop/direct/P161"
build_embeddings(node,pre,20)

data_rank      Entity              Label        Score  Rank
0  Q4768521         Anne Lacey  2642.886719     1
1   Q253495  Shefali Chowdhury  2650.745850     2
2   Q105682         Tom Felton  2666.912354     3
3   Q256250    William Melling  2682.230713     4
4   Q105466      Bonnie Wright  2695.176514     5
5   Q109098        Katie Leung  2702.775635     6
6  Q1401230       James Phelps  2707.671631     7
7  Q4495106     Natalie Hallam  2725.691162     8
8   Q461401      Jamie Waylett  2732.015137     9
9    Q19190       Rupert Grint  2773.385254    10


['Anne Lacey',
 'Shefali Chowdhury',
 'Tom Felton',
 'William Melling',
 'Bonnie Wright',
 'Katie Leung',
 'James Phelps',
 'Natalie Hallam',
 'Jamie Waylett',
 'Rupert Grint',
 'Oliver Phelps',
 'J. K. Rowling',
 'Alfred Enoch',
 'Brendan Gleeson',
 'Warwick Davis',
 'Clémence Poésy',
 'Geraldine Somerville',
 'Ralph Fiennes',
 'Alan Rickman',
 'Steven Claydon']

In [19]:

def build_sparql(entity, match_pred):
    str_temp='''
    SELECT DISTINCT ?person ?person_name WHERE 
    {{ 
        <{}> <{}> ?person .
        ?person <{}> ?person_name .
        
    }}
    '''
    #?movie rdfs:label ?label . FILTER (CONTAINS(str(?label),'{}'))
    query_template = str_temp.format(entity, match_pred, RDFS.label)

    print("--- sparql query: {}".format(query_template))

    qres = graph.query(query_template)

    print("\n--- querying results: ")
    print(qres)
    print(pd.DataFrame(qres.bindings))

    for row in qres:
       # print("row",row.person, row.person_name)
        answer = row.person_name
    return answer    
build_sparql(rdflib.term.URIRef("http://www.wikidata.org/entity/Q218589"),rdflib.term.URIRef("http://www.wikidata.org/prop/direct/P161"))

--- sparql query: 
    SELECT DISTINCT ?person ?person_name WHERE 
    { 
        <http://www.wikidata.org/entity/Q218589> <http://www.wikidata.org/prop/direct/P161> ?person .
        ?person <http://www.w3.org/2000/01/rdf-schema#label> ?person_name .
        
    }
    

--- querying results: 
<rdflib.plugins.sparql.processor.SPARQLResult object at 0x0000025B536F6340>
                                      person           person_name
0    http://www.wikidata.org/entity/Q1121589         Johnny Harris
1     http://www.wikidata.org/entity/Q118393         Leonard Carow
2   http://www.wikidata.org/entity/Q11956074        Trystan Pütter
3    http://www.wikidata.org/entity/Q1387430           Pip Torrens
4    http://www.wikidata.org/entity/Q1423737        Hannes Wegener
5    http://www.wikidata.org/entity/Q1619430    Hinnerk Schönemann
6    http://www.wikidata.org/entity/Q1620586        Markus Tomczyk
7    http://www.wikidata.org/entity/Q1797671           Nicolas Bro
8    http://www.wikidata.

rdflib.term.Literal('Rainer Bock', lang='en')

In [20]:

def find_Entity_Neighbours(query):
    
    topN = 10
    #print("query,label2ent",query,label2ent)
    #print("query,label2ent",query,label2ent[query],type(label2ent))
    assert query in label2ent
    assert topN > 0
    #print(len(ent2id))
    emb = np.atleast_2d(transE_ent[ent2id[label2ent[query]]])
    dist = pairwise_distances(emb, transE_ent)
    neighbours_list=[]
    for idx in dist.argsort().reshape(-1)[:topN]:
        #print(ent2label[id2ent[idx]])
        neighbours_list.append(ent2label[id2ent[idx]])
    return neighbours_list

In [21]:
human="Steven Spielberg"
def find_human_movie(human,movie_type=None):
    wdt_movie="http://www.wikidata.org/prop/direct/P31"
    wd="http://www.wikidata.org/entity/Q11424"
    wdt_actor = "http://www.wikidata.org/prop/direct/P161"
    wdt_director = "http://www.wikidata.org/prop/direct/P57"
    wdt_genre = "http://www.wikidata.org/prop/direct/P136"
    #query_template = 'SELECT DISTINCT ?x ?y WHERE {{ ?movie rdfs:label "{}"@en . ?movie <{}> ?x . ?x <{}> ?y .}}'.format(entity, match_pred, RDFS.label)
    if movie_type==None:
        str_temp = '''
        SELECT DISTINCT ?movie ?label WHERE {{
            ?movie <{}> <{}> . 
            ?human rdfs:label "{}"@en. 
            ?movie <{}> ?human . 
            ?movie <{}> ?label .
            }}

        '''
        query_template = str_temp.format(wdt_movie, wd,  human, wdt_director, RDFS.label)
    else:
        str_temp = '''
        SELECT DISTINCT ?movie ?label WHERE {{
            ?movie <{}> <{}> . 
            ?human rdfs:label "{}"@en. 
            ?movie <{}> ?human . 
            ?movie <{}> ?label .
            ?movie <{}> <{}> .
            }}

        '''
        query_template = str_temp.format(wdt_movie, wd,  human, wdt_director, RDFS.label,wdt_genre,movie_type)
    #query_template = 'SELECT DISTINCT ?movie ?label WHERE {{ ?movie rdfs:label ?label . ?movie <{}> <{}> .}}'.format(movie_name,wdt,wd)

    print("--- sparql query: {}".format(query_template))

    qres = graph.query(query_template)

    #print("\n--- querying results of director: ")
    #print(qres,len(qres),type(qres))
    entity_list = []
    for row in qres:
        #print("row",row.movie,row.label)
        entity_list.append(row.label)
    if len(entity_list)>5:
        N=5
    else:
        N=len(entity_list)
    movie_list=[]
    print("N",N)
    if(len(entity_list)>=1):
        for index in range(N):
          #  print("index",index)
            movie_list.append(str(entity_list[index]))
        return movie_list
    if movie_type==None:
        str_temp = '''
        SELECT DISTINCT ?movie ?label WHERE {{
            ?movie <{}> <{}> .
            ?human rdfs:label "{}"@en.
            ?movie <{}> ?human .
            }}
        '''
        query_template = str_temp.format(wdt_movie, wd, human, wdt_actor)
    else:
        str_temp = '''
        SELECT DISTINCT ?movie ?label WHERE {{
            ?movie <{}> <{}> .
            ?human rdfs:label "{}"@en.
            ?movie <{}> ?human .
            ?movie <{}> <{}> .
            }}
        '''
        query_template = str_temp.format(wdt_movie, wd, human, wdt_actor, wdt_genre,movie_type)

    #query_template = 'SELECT DISTINCT ?movie,?label WHERE {{ ?movie rdfs:label ?label . ?movie <{}> <{}> .}}'.format(movie_name,wdt,wd)

    print("--- sparql query of actor: {}".format(query_template))

    qres = graph.query(query_template)

    #print("\n--- querying results: ")
    #print(qres,len(qres),type(qres))
    entity_list = []
    for row in qres:
      #  print("row",row.movie,row.label)
        entity_list.append(row.label)
    if len(entity_list)>5:
        N=5
    else:
        N=len(entity_list)
    movie_list=[]
    #print("N",N)
    if(len(entity_list)>=1):
        for index in range(N):
     #       print("index",index)
            movie_list.append(str(entity_list[index]))
        return movie_list   
        
        
    return None
movie_type="http://www.wikidata.org/entity/Q369747"
print(find_human_movie(human,movie_type))


--- sparql query: 
        SELECT DISTINCT ?movie ?label WHERE {
            ?movie <http://www.wikidata.org/prop/direct/P31> <http://www.wikidata.org/entity/Q11424> . 
            ?human rdfs:label "Steven Spielberg"@en. 
            ?movie <http://www.wikidata.org/prop/direct/P57> ?human . 
            ?movie <http://www.w3.org/2000/01/rdf-schema#label> ?label .
            ?movie <http://www.wikidata.org/prop/direct/P136> <http://www.wikidata.org/entity/Q369747> .
            }

        
N 5
['War Horse', '1941', 'Saving Private Ryan', 'Empire of the Sun', 'Bridge of Spies']


In [22]:
def find_type(message):
    for key in type_list.keys():
        if key in message:
     #       print(key,type_list[key])
            return type_list[key]
    return None
find_type("action movie")    

    

rdflib.term.URIRef('http://www.wikidata.org/entity/Q188473')

In [23]:

def build_responce(message, graph_movie):
 #   nodes = {}
#    predicates = {}
#    nodes = set(graph_movie.subjects()) | {s for s in graph_movie.objects() if isinstance(s, URIRef)}
    #weiyu
#    predicates = set(graph_movie.predicates())

    nodes, predicates = build_nodes_predicates()
    WD = rdflib.Namespace('http://www.wikidata.org/entity/')
    WDT = rdflib.Namespace('http://www.wikidata.org/prop/direct/')
    DDIS = rdflib.Namespace('http://ddis.ch/atai/')
    RDFS = rdflib.namespace.RDFS
    SCHEMA = rdflib.Namespace('http://schema.org/')

   
    
    #print("labeled nodes: {}\n".format(nodes))
    #print("predicates: {}\n".format(predicates))


    result_list = []
    answer_template = ""
    movie_flag = False
    if "film" in message or "movie" in message:
        movie_flag=True
    entity,relation_list,relation_type = recognize_text(message,movie_flag)
    #print("entity,relation_list,relation_type",entity,relation_list,relation_type)
    #print("entity,relation",entity,relation,relation_type,type(relation_type))

    if relation_type==MultifactOriented:
        #entity = find_entity_relation_pattern(nodes,entity,predicates,relation_list,movie_flag)
        if len(relation_list)>0:
            answer=entity[0]
            #answer=build_sparql(entity, match_pred)
            answer_template = ["Hi, answer of {} is {}".format(message,answer)]
        else:
            answer_template = ["I can't find the relation of your answer, please try again."]

    elif relation_type==RecommendationOriented:
        #print("relation",relation)
        movie_type=find_type(message)
        if "film" in message or "movie" in message:
            movie_list=find_movie_entity(entity,movie_type)
            if movie_list==None:
                neighbours_list = find_human_movie(entity,movie_type)
            elif entity!=None:
                neighbours_list=find_Entity_Neighbours(entity)
            else:
                neighbours_list=movie_list
                #neighbours_list=find_Entity_Neighbours(entity)
        if neighbours_list!=None and len(neighbours_list)>0:
            answer_template = ["Hi, hope you will like {}".format(neighbours_list)]
        else:
            answer_template = ["sorry, I can't find related movie with {}".format(entity)]
    elif relation_type==PlotOriented:
        #print("entity,relation,relation_type",entity,relation,relation_type)
        movie_type=find_type(message)
        #print("movie_type",movie_type)
        entity_flag=True
        if entity==None and movie_type!=None:
            entity=find_movie_entity(entity,movie_type,20)
            entity_flag=False
        if entity==None:
            answer_template=["Sorry, I can't recognize your question, please try again."]
        else:
            #print("movie_entity:",entity,entity_flag)
            type_pic=relation_list
            plot_list=get_plot(graph,entity,type_pic,entity_flag,10)
            if len(plot_list)<1:
                answer_template=["Sorry, I can't find related plots, please try something else."]
            else:
                answer_template = ["Hi, hope you will like :"]
                for plot in plot_list:
                    answer_template.append(plot)
    elif relation_type==FactOriented:
        judge_flag = None
        if len(entity)==2:
            judge_flag = judge_entity_relation(entity)
            if judge_flag:
                answer_template = ["the answer is yes"]
            else:
                answer_template = ["the answer is no"]
        else:
            answer_template = ["Sorry I can't recognize your question, please try again."]
    else:
        answer_template = ["Sorry I can't recognize your question, please try again."]
    print("\n--- generated response: {}".format(answer_template))
    result_list=answer_template
    
    return result_list
    # res = str(chat)

In [24]:
def judge_entity_relation(entity_list):
    entity_related_list=[]
    query_list=[]
    for entity in entity_list:
        movie_list=find_movie_entity(entity,N=20)
        if movie_list!=None:
            query_list.append(movie_list)
        else:
            entity_related_list.append([entity])
    for i in range(len(entity_related_list)):
        for j in range(len(entity_related_list[i])):
            for t in range(len(query_list)):
                for k in range(len(query_list[t])):
                    result=find_relation(entity_related_list[i][j],query_list[t][k])
                    if result!=None:
                        return True
    return False    
        
    
    
    

In [25]:
ent="Daniel Radcliffe"
que="http://www.wikidata.org/entity/Q161687"
def find_relation(entity,query):
    #que="http://www.wikidata.org/entity/Q116852"
    #query=que
    #print(query, entity)
    str_temp='''SELECT DISTINCT *  WHERE 
    {{
        ?entity rdfs:label "{}"@en .
        <{}> ?relation ?entity .
       
    }}
    '''
    query_template = str_temp.format(entity,str(query))

    print("--- sparql query: {}".format(query_template))

    qres = graph.query(query_template)

    #print("\n--- querying results: ")
    #print(qres,len(qres),type(qres))
    if(len(qres)<1):
        return None
    else:
        for row in qres:
            #print("row",row.relation,row.entity)
            return [row.relation]
find_relation(ent,que)   

--- sparql query: SELECT DISTINCT *  WHERE 
    {
        ?entity rdfs:label "Daniel Radcliffe"@en .
        <http://www.wikidata.org/entity/Q161687> ?relation ?entity .
       
    }
    


[rdflib.term.URIRef('http://www.wikidata.org/prop/direct/P161')]

In [27]:
def find_movie_entity(entity,movie_type=None,N=1):
    wdt_movie="http://www.wikidata.org/prop/direct/P31"
    wd="http://www.wikidata.org/entity/Q11424"
    wdt_genre="http://www.wikidata.org/prop/direct/P136"
    #query_template = 'SELECT DISTINCT ?x ?y WHERE {{ ?movie rdfs:label "{}"@en . ?movie <{}> ?x . ?x <{}> ?y .}}'.format(entity, match_pred, RDFS.label)
    #query_template = 'SELECT DISTINCT ?movie ?label WHERE {{ ?movie <{}> <{}> . ?human rdfs:label "{}"@en. ?movie <{}> ?human . ?movie <{}> ?label .}}'.format(wdt_movie, wd,  human, wdt_director, RDFS.label)
    if entity==None and movie_type==None:
        return None
    if entity==None and movie_type!=None:
        str_temp='''SELECT DISTINCT ?movie ?label WHERE 
        {{
            ?movie <{}> <{}> .
            ?movie rdfs:label ?label.
            ?movie <{}> <{}> .
        }}
        '''
        query_template = str_temp.format(wdt_movie,wd,wdt_genre,movie_type)
    elif movie_type==None:
        str_temp='''SELECT DISTINCT ?movie ?label WHERE 
        {{
            ?movie <{}> <{}> .
            ?movie rdfs:label ?label.
            FILTER (CONTAINS(str(?label),'{}')) 
        }}
        '''
        query_template = str_temp.format(wdt_movie,wd,entity)
    else:
        str_temp='''SELECT DISTINCT ?movie ?label WHERE 
        {{
            ?movie <{}> <{}> .
            ?movie rdfs:label ?label.
            ?movie <{}> <{}> .
            FILTER (CONTAINS(str(?label),'{}')) 
        }}
        '''
        query_template = str_temp.format(wdt_movie,wd,wdt_genre,movie_type,entity)
    
    

    print("--- sparql query: {}".format(query_template))

    qres = graph.query(query_template)

    #print("\n--- querying results: ")
    #print(qres,len(qres),type(qres))
    movie_list=[]
    movie_dic={}
    #print(type(qres),qres)
    if len(qres)<1:
        return None
    else:
        for row in qres:
            #print(row.label)
            if entity==None:
                entity=""
            movie_dic[row.movie] = editdistance.eval(row.label, entity)
            #print("row",row.movie,movie_dic[row.movie],row.label)
    for k, v in sorted(movie_dic.items(), key=lambda item: item[1]):
        movie_list.append(k)
    #print(movie_list)

    return movie_list[:N]
movie_type="http://www.wikidata.org/entity/Q2421031"
find_movie_entity("Batman",movie_type,2)  

--- sparql query: SELECT DISTINCT ?movie ?label WHERE 
        {
            ?movie <http://www.wikidata.org/prop/direct/P31> <http://www.wikidata.org/entity/Q11424> .
            ?movie rdfs:label ?label.
            ?movie <http://www.wikidata.org/prop/direct/P136> <http://www.wikidata.org/entity/Q2421031> .
            FILTER (CONTAINS(str(?label),'Batman')) 
        }
        


[rdflib.term.URIRef('http://www.wikidata.org/entity/Q116852'),
 rdflib.term.URIRef('http://www.wikidata.org/entity/Q166262')]

In [29]:

# read_extract_data()
def get_plot(graph_movie,query_list,type_pic,is_label=True,N=20):
    WD = Namespace('http://www.wikidata.org/entity/')
    WDT = Namespace('http://www.wikidata.org/prop/direct/')
    SCHEMA = Namespace('http://schema.org/')
    DDIS = Namespace('http://ddis.ch/atai/')
    entities = set(graph_movie.subjects()) | {s for s in graph_movie.objects() if isinstance(s, URIRef)}

    f = open("images.json")
    data_json=json.load(f)

    #print("f_json",data_json[:10])
    len_item=N/len(query_list)+1
    #print("len_item",len_item)
    
    plot_list=[]
    for query in query_list:
        imdb_list=[]
        
        print("query",type(query),query)
        if is_label:
            query = Literal(query, lang='en')
            ents_with_label = graph_movie.subjects(RDFS.label, query)
           # print("ents_with_label",ents_with_label)
        else:
            ents_with_label = {query}
            #print("ents_with_label",ents_with_label,query)

        # Note that there might be multiple wd entities with the same label.
        # E.g. here there's a book and a movie matching the query.

        # You can consider additional properties, such as the entity type,
        # to filter *ents_with_labels. Note that wd:Q11424 is the type 'film'.
        # ents = {qid for qid in ents_with_label if WD.Q11424 in g.objects(qid, WDT.P31)}

        # In your example, you're targetting an IMDb-ID. You can use this to
        # filter non-film wd entities since only movies have an IMDb-ID
        # starting with 'tt'. Note that people may have an IMDb-ID beginning
        # with 'nm'. IMDb-IDs are linked to wd entities via wdt:P345.
        ents = {
        # we get a mapping from wd-qid to imdb-id
        qid: imdb_id
        # for all entities that match the query label
        for qid in ents_with_label
        # filtered by those that have an IMDb-ID
        for imdb_id in graph_movie.objects(qid, WDT.P345)
        # and where the IMDb-ID begins with 'tt' (i.e. a movie)
        #if imdb_id.startswith('tt')
        }
        #print(ents)
        
        for qid,imdb_id in ents.items():
            #print("qid,imdb_id",qid,imdb_id)
        #qid="http://www.wikidata.org/entity/Q22908422"
            #print("qid",qid)
            index=rdflib.term.URIRef(qid)
            #print("index",index)
            imdb_list.append(ents[index])

       # print("imdb_list",imdb_list)



        for imdb in imdb_list:
            #print("imdb",imdb,type(str(imdb)),str(imdb))
            imdb=str(imdb)
            if imdb.startswith("tt"):
                type_imdb="movie"
            elif imdb.startswith("nm"):
                type_imdb="cast"
            else:
                continue
            len_flag=True
            index=0
            print("imdb,plot_list",imdb,plot_list)
            for item in data_json:
                if len_flag:
                    if type_pic!='none':
                        if str(item['type'])==type_pic:
                            #print("item['movie']",item['movie'],type(item['movie']))
                            for movie in item[type_imdb]:
                                #print("movie",movie,imdb)
                                if movie==imdb:
                                    #print("got again1")
                                    imag="image:"+item['img'].split(".jpg")[0]
                                    #print("imag---",imag,type(imag))
                                    plot_list.append(imag)
                                    index+=1
                                    #print("plot_list",plot_list)
                                    if index>=len_item:
                                        len_flag=False
                    else:
                          #print("get in")
                        for movie in item[type_imdb]:
                              #print("movie",movie,imdb)
                            if movie==imdb:
                                #print("got again")
                                imag="image:"+item['img'].split(".jpg")[0]
                                #print("imag---",imag,type(imag))
                                plot_list.append(imag)
                                index+=1
                                #print("plot_list",plot_list)
                                index+=1
                                if index>=len_item:
                                    len_flag=False
    f.close()
    print("plot_list",plot_list)
    return plot_list
ref=[rdflib.term.URIRef('http://www.wikidata.org/entity/Q1533374'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q56000996'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q1718337'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q29466808'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q20463713'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q219776'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q1027810'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q1261757'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q1676155'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q17606382'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q18152555'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q20058691'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q25344358'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q375512'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q4153408'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q54366261'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q54924330'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q56907035'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q57986045'), rdflib.term.URIRef('http://www.wikidata.org/entity/Q963867')]
name=["Adam West"]
plot_list=get_plot(graph,name,"none",True)
print("plot_list",plot_list)

query <class 'str'> Adam West
imdb,plot_list nm0001842 []
plot_list ['image:2983/rm3654363904', 'image:2017/rm2042769408', 'image:0986/rm3089576192', 'image:1750/rm385308928', 'image:3198/rm1178740736', 'image:2913/rm1319065856', 'image:0736/rm2972317696', 'image:1661/rm2101487616', 'image:0760/rm2298032640', 'image:3239/rm693445120', 'image:0773/rm1163080960']
plot_list ['image:2983/rm3654363904', 'image:2017/rm2042769408', 'image:0986/rm3089576192', 'image:1750/rm385308928', 'image:3198/rm1178740736', 'image:2913/rm1319065856', 'image:0736/rm2972317696', 'image:1661/rm2101487616', 'image:0760/rm2298032640', 'image:3239/rm693445120', 'image:0773/rm1163080960']


In [30]:
import requests, json, time
# user login
url = "https://speakeasy.ifi.uzh.ch"
def login(username: str, password: str):
    return requests.post(url=url + "/api/login", json={"username": username, "password": password})

# check details of the current user
def current(session_token: str):
    return requests.get(url=url + "/api/user/current", params={"session": session_token})

# user logout
def logout(session_token: str):
    return requests.get(url=url + "/api/logout", params={"session": session_token})


# check available chat rooms
def check_rooms(session_token: str):
    return requests.get(url=url + "/api/rooms", params={"session": session_token})

# check the state of a chat room
def check_room_state(room_id: str, since: int, session_token: str):
    return requests.get(url=url + "/api/room/{}/{}".format(room_id, since), params={"roomId": room_id, "since": since, "session": session_token})

# post a message to a chat room
def post_message(room_id: str, session_token: str, message: str):
    return requests.post(url=url + "/api/room/{}".format(room_id), params={"roomId": room_id, "session": session_token}, data=message)


def demo_agent():


    # get the api specification
    r = requests.get(url + "/client-specs")
    spec = json.loads(r.text)
    print(spec)

    # - check the spec of Login

    # In[3]:

    print(json.dumps(spec["paths"]["/api/login"], indent=4))


    # In[4]:


    print(json.dumps(spec["components"]["schemas"]["LoginRequest"], indent=4))


    # ## User State-related Functions


    # load credentials from a json file
    with open("./credentials.json", "r") as f:
        credentials = json.load(f)
    username = credentials["agent2"]["username"]
    password = credentials["agent2"]["password"]

    print("--- log in as an agent")
    agent_details = login(username=username, password=password).json()
    print("agent details:")
    print(json.dumps(agent_details, indent=4))

    print("--- check user details with the session token {}".format(agent_details["sessionToken"]))
    current_details = current(session_token=agent_details["sessionToken"]).json()
    print(json.dumps(current_details, indent=4))

    print("--- log out")
    r = logout(session_token=agent_details["sessionToken"])
    print(r.json())


    # ## Chat-related Functions
    # 

    # ## A Dummy Agent


    agent_details = login(username=username, password=password).json()
    print("--- agent details:")
    print(json.dumps(agent_details, indent=4))

    chatroom_messages = {}
    while True:
        current_rooms = check_rooms(session_token=agent_details["sessionToken"]).json()["rooms"]
        print("--- {} chatrooms available".format(len(current_rooms)))

        for idx, room in enumerate(current_rooms):
            room_id = room["uid"]
            print("chat room - {}: {}".format(idx, room_id))

            new_room_state = check_room_state(room_id=room_id, since=0, session_token=agent_details["sessionToken"]).json()
            new_messages = new_room_state["messages"]
            print("found {} messages".format(len(new_messages)))

            if room_id not in chatroom_messages.keys():
                chatroom_messages[room_id] = []

            if len(chatroom_messages[room_id]) != len(new_messages):
                for message in new_messages:
                    if message["ordinal"] >= len(chatroom_messages[room_id]) and message["session"] != agent_details["sessionId"]:
                        
                        response = "Got your message \"{}\" at {}.".format(message["message"], time.strftime("%H:%M:%S, %d-%m-%Y", time.localtime()))
                        
                        post_message(room_id=room_id, session_token=agent_details["sessionToken"], message=response)
                        
                        try:
                            temp_responce=build_responce(message["message"],graph)
                            for item in temp_responce:
                                post_message(room_id=room_id, session_token=agent_details["sessionToken"], message=item)
                        except:
                            post_message(room_id=room_id, session_token=agent_details["sessionToken"], message="Sorry, I can't recognize your question, please try again.")
                        
            chatroom_messages[room_id] = new_messages

        time.sleep(3)
        print("")

    print("--- log out")
    r = logout(session_token=agent_details["sessionToken"])
    print(r.json())
#demo_agent()
    
    
    
    

In [32]:
demo_agent()

{'openapi': '3.0.1', 'info': {'title': "Alan's Speakeasy", 'description': "Client API for Alan's Speakeasy, Version 0.1", 'version': '0.1'}, 'paths': {'/api/login': {'post': {'tags': ['User'], 'summary': 'Sets roles for session based on user account and returns a session cookie.', 'operationId': 'postApiLogin', 'requestBody': {'content': {'application/json': {'schema': {'$ref': '#/components/schemas/LoginRequest'}}}}, 'responses': {'200': {'description': 'OK', 'content': {'application/json': {'schema': {'$ref': '#/components/schemas/UserSessionDetails'}}}}, '400': {'description': 'Bad Request', 'content': {'application/json': {'schema': {'$ref': '#/components/schemas/ErrorStatus'}}}}, '401': {'description': 'Unauthorized', 'content': {'application/json': {'schema': {'$ref': '#/components/schemas/ErrorStatus'}}}}}}}, '/api/logout': {'get': {'tags': ['User'], 'summary': 'Clears all user roles of the current session.', 'operationId': 'getApiLogout', 'parameters': [{'name': 'session', 'in'

agent details:
{
    "userDetails": {
        "id": "328e2699-c5bc-4529-843a-3706fb725d5e",
        "username": "bot_464",
        "role": "BOT"
    },
    "sessionToken": "node0oi169dduulw019aa99e7xt991978970",
    "sessionId": "83c55890-2890-4402-b162-339fcce6d4b8",
    "startTime": 1639602879851,
    "userSessionAlias": "jolly_liskov_58"
}
--- check user details with the session token node0oi169dduulw019aa99e7xt991978970
{
    "userDetails": {
        "id": "328e2699-c5bc-4529-843a-3706fb725d5e",
        "username": "bot_464",
        "role": "BOT"
    },
    "sessionToken": "node0oi169dduulw019aa99e7xt991978970",
    "sessionId": "83c55890-2890-4402-b162-339fcce6d4b8",
    "startTime": 1639602879851,
    "userSessionAlias": "jolly_liskov_58"
}
--- log out
{'description': 'Logged out'}
--- agent details:
{
    "userDetails": {
        "id": "328e2699-c5bc-4529-843a-3706fb725d5e",
        "username": "bot_464",
        "role": "BOT"
    },
    "sessionToken": "node01x138amgu0d241sa2k

KeyboardInterrupt: 

In [31]:

question = "Who is the director of the movie Batman ?"
question = "what is the occupation of Jean Van Hamme"
question = "I like the Jurassic Park movie; can you recommend any similar action movies?"
question = "I am a big fan of Steven Spielberg, could you recommend some of his action movies?"
question = "Can you show me the poster of the actor of the movie Harry Potter ?  "
question="What is the name of the lead actor in the Batman movie?"  
#Harry potter
#actor

question = "Did Daniel Radcliffe ever work on Harry Potter?"
question="What is the birthplace of Christopher Nolan?"
question = "Is Daniel Radcliffe worked on a Harry Potter movie"
question = "Show me the pictures of the lead actors of the movie Jurassic Park." 
question = "Show me an action movie poster. "
#question="hi"
#question = "Where is Christopher Nolan born in?"
#question = "Can you show me a picture of Christopher Nolan"

result = build_responce(question,graph)
print(result,type(result),len(result))



raw_message Show me an action movie poster. 
text1 Show me an action movie poster. 
text1.ents () 0
entity None
relation_type Plot_oriented
relation_list None
relation_list None
relation_pattern poster <class 'str'>
--- sparql query: SELECT DISTINCT ?movie ?label WHERE 
        {
            ?movie <http://www.wikidata.org/prop/direct/P31> <http://www.wikidata.org/entity/Q11424> .
            ?movie rdfs:label ?label.
            ?movie <http://www.wikidata.org/prop/direct/P136> <http://www.wikidata.org/entity/Q188473> .
        }
        
query <class 'rdflib.term.URIRef'> http://www.wikidata.org/entity/Q1533374
imdb,plot_list tt0139239 []
query <class 'rdflib.term.URIRef'> http://www.wikidata.org/entity/Q56000996
imdb,plot_list tt6857112 []
query <class 'rdflib.term.URIRef'> http://www.wikidata.org/entity/Q1718337
imdb,plot_list tt0102178 ['image:1708/rm1345681152', 'image:2471/rm1591960320']
query <class 'rdflib.term.URIRef'> http://www.wikidata.org/entity/Q29466808
imdb,plot_list t

In [214]:
#!pip install SPARQLWrapper

In [70]:
raw_message = "Is Christopher Nolan a director ?"  
NER = spacy.load("en_core_web_md")
text1 = NER(raw_message)
print(text1)
for word in text1.ents:
    print(word)

Is Christopher Nolan a director ?
Christopher Nolan
