In [1]:
!pip install jieba
!pip install refo
!pip install rdflib



In [0]:
from refo import finditer, Predicate, Star, Any

import re
import rdflib
import jieba.posseg as pseg

Knowledge

In [0]:
URI_PREFIX = 'http://kgdemo.com/'

triples = [
  ('宝马', 'is_what', '宝马（BMW）是德国高端汽车品牌'),
  ('宝马', 'is_how', '德系大品牌，质量杠杠的，你值得拥有'),
  ('宝马', 'compare_to_other', '各有千秋，但是人生苦短，我选宝马'),
]

graph = rdflib.Graph()
resources = set([r for triple in triples for r in triple])
resource2uri = {r: URI_PREFIX + r for r in resources}
uri2resource = {uri: r for r, uri in resource2uri.items()}
for (s, p, o) in triples:
  s_uri = rdflib.URIRef(resource2uri[s])
  p_uri = rdflib.URIRef(resource2uri[p])
  o_uri = rdflib.URIRef(resource2uri[o])
  graph.add((s_uri, p_uri, o_uri))

Rule

In [0]:
class W(Predicate):
  def __init__(self, token='.*', pos='.*'):
    self.token = re.compile(token + '$')
    self.pos = re.compile(pos + '$')
    super(W, self).__init__(self.match)
                  
  def match(self, word):
    m1 = self.token.match(word.token)
    m2 = self.pos.match(word.pos)
    return m1 and m2


class Rule(object):
  def __init__(self, condition=None, action=None):
    self.condition = condition
    self.action = action
          
  def apply(self, sentence):
    matches = []
    for m in finditer(self.condition, sentence):
      i, j = m.span()
      matches.extend(sentence[i:j])
    return self.action(matches)


class Word(object):
  def __init__(self, token, pos):
    self.token = token
    self.pos = pos

In [0]:
def what_is_xxx(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    print("Template: 'what is xxx?'")
    for word in matches:
      if word.pos == 'nr':
        return (
          """
          PREFIX : <%s>
          
          SELECT DISTINCT ?o WHERE {
              :%s :is_what ?o .
          }
          """ % (URI_PREFIX, word.token)
        )


def how_is_xxx(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    print("Template: 'how is xxx?'")
    for word in matches:
      if word.pos == 'nr':
        return (
          """
          PREFIX : <%s>
          
          SELECT DISTINCT ?o WHERE {
              :%s :is_how ?o .
          }
          """ % (URI_PREFIX, word.token)
        )


def compare_to_xxx(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    print("Template: 'compare to xxx?'")
    for word in matches:
      if word.pos == 'nr':
        return (
          """
          PREFIX : <%s>
          
          SELECT DISTINCT ?o WHERE {
              :%s :compare_to_other ?o .
          }
          """ % (URI_PREFIX, word.token)
        )

In [0]:
rules = [                                                 
  Rule(condition = W(pos='r') + W('是') + W(pos='nr') | \
                   W(pos='nr') + W('是') + W(pos='r'),
       action = what_is_xxx),
  
  Rule(condition = (W(pos='nr') | W(pos='v')) + W('和') + (
                    W(pos='nr') | W(pos='v')) + Star(W('比')) + (W('怎么样') | W('哪个好')),
       action = compare_to_xxx),  

  Rule(condition = W(pos='nr') + Star(Any(), greedy=False) + (
                   W('怎么样') | W('怎样')),
       action = how_is_xxx),
]

In [7]:
for utt in ['宝马是什么',
            '宝马这个车怎样',
            '宝马和奔驰比怎么样']:
  for rule in rules:
    db_query = rule.apply([Word(word, tag) for word, tag in pseg.cut(utt)])
    if db_query:
      for row in graph.query(db_query):
        print('Output:', uri2resource[row.o.toPython()])
        print()
      break

Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.761 seconds.
Prefix dict has been built successfully.


Query: 宝马|nr  是|v  什么|r
Template: 'what is xxx?'
Output: 宝马（BMW）是德国高端汽车品牌

Query: 宝马|nr  这个|r  车|zg  怎样|r
Template: 'how is xxx?'
Output: 德系大品牌，质量杠杠的，你值得拥有

Query: 宝马|nr  和|c  奔驰|v  比|p  怎么样|r
Template: 'compare to xxx?'
Output: 各有千秋，但是人生苦短，我选宝马

