## 1. Data Load


In [18]:
from rdflib.namespace import Namespace, RDF, RDFS, XSD
from rdflib.term import URIRef, Literal
import csv
import rdflib
import plotly.io as pio
pio.renderers.default = 'jupyterlab+svg'
import numpy as np
from sklearn.metrics import pairwise_distances
from speakeasypy import Speakeasy, Chatroom
from typing import List
import time
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import torch
from torch import nn
import re
from thefuzz import fuzz,process
import editdistance
import itertools

import jsonpickle
# NOTE: You might have to download a few things for nltk to work properly
import nltk
from nltk.corpus import wordnet as wn
from nltk import Tree

# NOTE: You might have to download the en_core_web_sm model for this to work
import spacy
nlp = spacy.load("en_core_web_sm")
from spacy import displacy

In [21]:
import os
import sys

if os.getcwd().split('/')[-1] == 'notebooks':
    os.chdir('../')
os.getcwd()

'/Users/melihserin/Desktop/ATAI/uzh__advanced_topics_in_ai'

In [26]:
g = rdflib.Graph()
g.parse('data/14_graph.nt', format='turtle')

<Graph identifier=Nbe04587ea1ff42659a60b5bc277710fd (<class 'rdflib.graph.Graph'>)>

In [None]:
# load the embeddings
entity_emb = np.load('data/ddis-graph-embeddings/entity_embeds.npy')
relation_emb = np.load('data/ddis-graph-embeddings/relation_embeds.npy')

In [10]:
# load the dictionaries
with open('data/ddis-graph-embeddings/entity_ids.del', 'r') as ifile:
    ent2id = {str(rdflib.term.URIRef(ent)): int(idx) for idx, ent in csv.reader(ifile, delimiter='\t')}
    id2ent = {v: k for k, v in ent2id.items()}
with open('data/ddis-graph-embeddings/relation_ids.del', 'r') as ifile:
    rel2id = {str(rdflib.term.URIRef(rel)): int(idx) for idx, rel in csv.reader(ifile, delimiter='\t')}
    id2rel = {v: k for k, v in rel2id.items()}

In [12]:
ent2lbl = {str(ent): str(lbl) for ent, lbl in g.subject_objects(RDFS.label)}
lbl2ent = {lbl: ent for ent, lbl in ent2lbl.items()}

In [13]:
# prefixes used in the graph
WD = Namespace('http://www.wikidata.org/entity/')
WDT = Namespace('http://www.wikidata.org/prop/direct/')
SCHEMA = Namespace('http://schema.org/')
DDIS = Namespace('http://ddis.ch/atai/')
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")

## 2.Agent Demo

### Evaluation 2

In [14]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

ner = pipeline("ner", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [24]:
def extract_nodes(g):
    nodes = {}
    query ="""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

    SELECT ?lbl WHERE {{
        <{}> rdfs:label ?lbl .
        FILTER(LANG(?lbl) = "en").
    }}
    LIMIT 1
    """

    graph_entities = set(g.subjects(unique=True)) | {s for s in g.objects(unique=True) if isinstance(s, URIRef)}
    for node in graph_entities:
        entity = node.toPython()
        if isinstance(node, URIRef):            
            qres = g.query(query.format(entity))
            for row in qres:
                answer = row.lbl
            
            nodes[str(answer)] = entity
    return nodes

def extract_predicates(g):
    query ="""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

    SELECT ?lbl WHERE {{
        <{}> rdfs:label ?lbl .
        FILTER(LANG(?lbl) = "en").
    }}
    LIMIT 1
    """
    predicates = {}

    graph_predicates = set(g.predicates(unique=True))
    for predicate in graph_predicates:
        predicate_ = predicate.toPython()       
        qres = g.query(query.format(predicate_))
        for row in qres:
            answer = row.lbl
        
        predicates[str(answer)] = predicate_

    return predicates

# make variables for the nodes and predicates path
nodes_path = 'data/processed/nodes.json'
predicates_path = 'data/processed/predicates.json'

# check indiviudally if the files exist and if so load them
if os.path.exists(nodes_path):
    with open(nodes_path, 'r') as ifile:
        nodes = jsonpickle.decode(ifile.read())
else:
    nodes = extract_nodes(g)
    with open(nodes_path, 'w') as ofile:
        ofile.write(jsonpickle.encode(nodes))

if os.path.exists(predicates_path):
    with open(predicates_path, 'r') as ifile:
        predicates = jsonpickle.decode(ifile.read())
else:
    predicates = extract_predicates(g)
    with open(predicates_path, 'w') as ofile:
        ofile.write(jsonpickle.encode(predicates))

In [22]:
class Chatbot(nn.Module):
    def __init__(self):
        super().__init__()
        self.ner = ner
        self.factual_question_patterns = [
            "who is the (.*) of ENTITY",
            "what is the (.*) of ENTITY",
            "who (.*) ENTITY",
            "when was ENTITY (.*)",
            "where was ENTITY (.*)",
            "where is ENTITY (.*)"
        ]
        self.nodes = nodes
        self.predicates = predicates
        self.entity_emb = entity_emb
        self.relation_emb = relation_emb
        self.ent2id = ent2id
        self.rel2id = rel2id
        self.ent2lbl = ent2lbl
        self.lbl2ent = lbl2ent
        self.id2ent = id2ent

    def entity_extraction(self,ner_results,example):
        entity = ""
        entity_list = []
        reset=0
        for entity_num in range(len(ner_results)):
            if (ner_results[entity_num]["word"].find("#") ==-1) & (reset!=0):
                entity = entity + " " + ner_results[entity_num]["word"]
                reset +=1
            else:
                entity = entity + ner_results[entity_num]["word"].replace("#","")
                reset +=1
            
            if (entity_num < len(ner_results)-1):
                if (ner_results[entity_num+1]["start"] - ner_results[entity_num]["end"] > 3):
                    entity_list.append(entity)
                    reset=0
                    entity = ""
                    continue
            else:
                entity_list.append(entity)
                reset=0
                continue
        for n,entity in enumerate(entity_list):
            if len(entity.split(" "))>1:
                first_word = entity.split(" ")[0]
                last_word = entity.split(" ")[-1]
                search_str = first_word + "(.+?)" + last_word
                entity_list[n] = re.search(search_str,example).group(0)
            else:
                continue
        return entity_list
    
    def preprocessing_before_ner(self,question):
        try:
            question_new=re.sub(re.search("(.*?)of",question).group(0), re.search("(.*?)of",question).group(0).lower() ,question)
        except:
            question_new=question
        return question_new
    
    def preprocessing(self,question):
        return question.replace("?","").lower()

    # which pattern is used in the given question?
    def pattern_detection(self,ner_results,example):
        entities_extracted = self.entity_extraction(ner_results,example)

        pattern_and_entity = [[re.sub("ENTITY",entity_from_list, pattern),entity_from_list] for pattern in self.factual_question_patterns for entity_from_list in entities_extracted]
        pattern_entity_included = [lists[0] for lists in pattern_and_entity]
        entity_from_pattern_and_entity = list(dict.fromkeys([lists[1] for lists in pattern_and_entity]))


        question_pattern = process.extract(example,pattern_entity_included,scorer=fuzz.ratio)[0][0]
        question_pattern_ = [re.sub(value,"ENTITY",question_pattern) for value in entity_from_pattern_and_entity if question_pattern.find(value)!=-1][0]

        index = [num for num,value in enumerate(self.factual_question_patterns) if value==question_pattern_][0]

        return question_pattern,index

    def relation_extraction(self,ner_results,example):
        question_pattern, index = self.pattern_detection(ner_results,example)
        relation = re.match(self.preprocessing(question_pattern), self.preprocessing(example)).group(1)

        return relation # take care of directed, released, etc. cases
    
    def match_things(self,dict, input):
        tmp = 9999
        match_key = ""
        match_value = ""
        for key, value in dict.items():
            if editdistance.eval(key.lower(), input) < tmp:
                tmp = editdistance.eval(key.lower(), input)
                match_key = key
                match_value = value
        
        return match_key,match_value
    
    def final_query(self,matched_entity,matched_entity_url,matched_predicate,matched_predicate_url):
        query_option1 ="""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

        SELECT ?lbl WHERE {{
            <{}> <{}> ?answer.
            ?answer rdfs:label ?lbl .
            FILTER(LANG(?lbl) = "en").
        }}
        LIMIT 1
        """.format(matched_entity_url,matched_predicate_url)

        query_option2 ="""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

        SELECT ?lbl WHERE {{
            ?answer <{}> <{}>.
            ?answer rdfs:label ?lbl .
            FILTER(LANG(?lbl) = "en").
        }}
        LIMIT 1
        """.format(matched_predicate_url,matched_entity_url)

        qres1 = g.query(query_option1)
        qres2 = g.query(query_option2)

        answer = ""
        try:
            for row in qres1:
                answer = row.lbl
        except answer == "":
            for row in qres2:
                answer = row.lbl 


        if answer == "":
            answer1, answer2, answer3 = self.final_embed(matched_entity_url,matched_predicate_url)    
            return f"According to the embeddings, the {matched_predicate} of {matched_entity} is {answer1}, {answer2}, {answer3}."      
        else:
            answer1, answer2, answer3 = self.final_embed(matched_entity_url,matched_predicate_url)    
            return f"""According to the embeddings, the {matched_predicate} of {matched_entity} is {answer1}, {answer2}, or {answer3}. On the other hand, as of my knowledge, the {matched_predicate} of {matched_entity} is {answer}."""
    
    def final_embed(self,matched_entity_url,matched_predicate_url):
        head = self.entity_emb[self.ent2id[matched_entity_url]]
        pred = self.relation_emb[self.rel2id[matched_predicate_url]]
        # add vectors according to TransE scoring function.
        lhs = head + pred
        # compute distance to *any* entity
        dist = pairwise_distances(lhs.reshape(1, -1), entity_emb).reshape(-1)
        # find most plausible entities
        most_likely = dist.argsort()
        # compute ranks of entities
        ranks = dist.argsort().argsort()

        most_plausible_3_answers = [(str(self.id2ent[idx]), self.ent2lbl[self.id2ent[idx]])
            for rank, idx in enumerate(most_likely[:3])]
        
        answer1, answer2, answer3 = most_plausible_3_answers[0][1],most_plausible_3_answers[1][1],most_plausible_3_answers[2][1]
        return answer1, answer2, answer3

    def forward(self,input):
        ner_results = self.ner(self.preprocessing_before_ner(input))
        entities = self.entity_extraction(ner_results,input)
        entity = entities[0]
        relation = self.relation_extraction(ner_results,input)

        matched_entity, matched_entity_url= self.match_things(self.nodes, entity)
        matched_predicate, matched_predicate_url= self.match_things(self.predicates, relation)

        output = self.final_query(matched_entity,matched_entity_url,matched_predicate,matched_predicate_url)
        return output
            

In [23]:
chatbot = Chatbot()
chatbot("What is the genre of Good Neighbors?")

'According to the embeddings, the genre of Good Neighbors is drama, comedy-drama, or comedy film. On the other hand, as of my knowledge, the genre of Good Neighbors is art film.'

### Trying out different parsings with nltk / spaCy

In [43]:
tokens = nltk.word_tokenize("What is the genre of Good Neighbors?")
print(tokens)

tagged = nltk.pos_tag(tokens)
print(tagged)

['What', 'is', 'the', 'genre', 'of', 'Good', 'Neighbors', '?']
[('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('genre', 'NN'), ('of', 'IN'), ('Good', 'JJ'), ('Neighbors', 'NNS'), ('?', '.')]


In [46]:
entities = nltk.chunk.ne_chunk(tagged)
print(entities)

entities.draw()

(S
  What/WP
  is/VBZ
  the/DT
  genre/NN
  of/IN
  (GPE Good/JJ)
  Neighbors/NNS
  ?/.)


In [50]:
doc = nlp('what is the genre of Good Neighbors?')
doc

def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_

print(list(doc.sents))

[to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]

[what is the genre of Good Neighbors?]
      is                    
  ____|________              
 |    |      genre          
 |    |    ____|_______      
 |    |   |            of   
 |    |   |            |     
 |    |   |        Neighbors
 |    |   |            |     
what  ?  the          Good  



[None]

In [5]:
doc = nlp('what is the genre of Good Neighbors?')
displacy.render(doc, style='dep', jupyter=True)

In [6]:
doc = nlp('Who is the director of Good Neighbors?')
displacy.render(doc, style='dep', jupyter=True)

In [7]:
doc = nlp('Who directed of Good Neighbors?')
displacy.render(doc, style='dep', jupyter=True)

In [103]:
# parse all nouns in the phrase including compounds
# also check for compounds and add those

def extract_entities(doc):
    sent = list(doc.sents)[0]

    root_type = sent.root.pos_

    entity1 = ''
    entity2 = ''

    # Case "Who is the director of Good Neighbors?"
    if root_type == 'AUX':
        for child in sent.root.children:
            if child.dep_ == 'nsubj':
                entity1 = child.text

                for subchild in child.children:
                    if subchild.dep_ == 'compound':
                        entity1 = subchild.text + ' ' + entity1

                for subchild in child.children:
                    if subchild.dep_ == 'prep':
                        for subsubchild in subchild.children:
                            if subsubchild.dep_ == 'pobj':
                                entity2 = subsubchild.text

                                for subsubsubchild in subsubchild.children:
                                    if subsubsubchild.dep_ == 'compound':
                                        entity2 = subsubsubchild.text + ' ' + entity2

        return [entity1, entity2]
                
    # Case "Who directed Good Neighbors?"
    elif root_type == 'VERB':
        entity1 = sent.root.text

        for child in sent.root.children:
            if child.dep_ == 'dobj':
                entity2 = child.text

                for subchild in child.children:
                    if subchild.dep_ == 'compound':
                        entity2 = subchild.text + ' ' + entity2

        return [entity1, entity2]

    else:
        return []

def parse_query(query):
    doc = nlp(query)
    entities = extract_entities(doc)
    print(query)
    print(f"\t{entities[0]} -> {entities[1]}")
    
    return entities

In [119]:
parse_query('What is the genre of Good Neighbors?')
parse_query('Who is the director of Good Neighbors?')
parse_query('Who directed Good Neighbors?')

None


What is the genre of Good Neighbors?
	genre -> Good Neighbors
Who is the director of Good Neighbors?
	director -> Good Neighbors
Who directed Good Neighbors?
	directed -> Good Neighbors


In [None]:
WN_NOUN = 'n'
WN_VERB = 'v'
WN_ADJECTIVE = 'a'
WN_ADJECTIVE_SATELLITE = 's'
WN_ADVERB = 'r'


def convert(input, from_pos, to_pos):    
    """ Transform words given from/to POS tags """
    words,temp_word_list=[],[]
    for index,word in enumerate(input.split(" ")):
        synsets = wn.synsets(word, pos=from_pos)

        # Word not found
        if not synsets:
            if len(words)==0:
                words.append((word,1.0))
            else:
                words =[(w+" "+word, p) for w,p in words]
        else:
            # Get all lemmas of the word (consider 'a'and 's' equivalent)
            lemmas = []
            for s in synsets:
                for l in s.lemmas():
                    if s.name().split('.')[1] == from_pos or from_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and s.name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
                        lemmas += [l]

            # Get related forms
            derivationally_related_forms = [(l, l.derivationally_related_forms()) for l in lemmas]
            # filter only the desired pos (consider 'a' and 's' equivalent)
            related_noun_lemmas = []

            for drf in derivationally_related_forms:
                if from_pos == "n":
                    related_noun_lemmas += [drf[0]]
                else:
                    for l in drf[1]:
                        if l.synset().name().split('.')[1] == to_pos or to_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and l.synset().name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
                            related_noun_lemmas += [l]

            # Extract the words from the lemmas
            temp_word_list=[l.name() for l in related_noun_lemmas]
            temp_word_list = [(w, float(temp_word_list.count(w)) / len(temp_word_list)) for w in set(temp_word_list)]

            # Take all the combinations for synonyms of different words
            # Build the result in the form of a list containing tuples (word, probability)
            if len(words)==0:
                words=temp_word_list
            else:
                words =[(w_b+" "+w_t, p_b*p_t) for w_b,p_b in words for w_t,p_t in temp_word_list]
                words.sort(key=lambda w:-w[1])

    # return all the possibilities sorted by probability
    return words

convert('MPAA film rating', WN_NOUN, WN_NOUN)

In [120]:
# OLD VERSION

# WN_NOUN = 'n'
# WN_VERB = 'v'
# WN_ADJECTIVE = 'a'
# WN_ADJECTIVE_SATELLITE = 's'
# WN_ADVERB = 'r'


# def convert(word, from_pos, to_pos):    
#     """ Transform words given from/to POS tags """

#     synsets = wn.synsets(word, pos=from_pos)

#     # Word not found
#     if not synsets:
#         return []

#     # Get all lemmas of the word (consider 'a'and 's' equivalent)
#     lemmas = []
#     for s in synsets:
#         for l in s.lemmas():
#             if s.name().split('.')[1] == from_pos or from_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and s.name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
#                 lemmas += [l]

#     # Get related forms
#     derivationally_related_forms = [(l, l.derivationally_related_forms()) for l in lemmas]

#     # filter only the desired pos (consider 'a' and 's' equivalent)
#     related_noun_lemmas = []

#     for drf in derivationally_related_forms:
#         for l in drf[1]:
#             if l.synset().name().split('.')[1] == to_pos or to_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and l.synset().name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
#                 related_noun_lemmas += [l]

#     # Extract the words from the lemmas
#     words = [l.name() for l in related_noun_lemmas]
#     len_words = len(words)

#     # Build the result in the form of a list containing tuples (word, probability)
#     result = [(w, float(words.count(w)) / len_words) for w in set(words)]
#     result.sort(key=lambda w:-w[1])

#     # return all the possibilities sorted by probability
#     return result

# convert('directed', WN_VERB, WN_NOUN)

[('guide', 0.075),
 ('organisation', 0.075),
 ('director', 0.075),
 ('head', 0.075),
 ('organization', 0.075),
 ('address', 0.05),
 ('target', 0.05),
 ('aim', 0.05),
 ('steering', 0.05),
 ('place', 0.025),
 ('leader', 0.025),
 ('lead', 0.025),
 ('sending', 0.025),
 ('directive', 0.025),
 ('mastermind', 0.025),
 ('engineer', 0.025),
 ('steerage', 0.025),
 ('orchestration', 0.025),
 ('organizer', 0.025),
 ('addressee', 0.025),
 ('maneuverer', 0.025),
 ('conducting', 0.025),
 ('heading', 0.025),
 ('manoeuvre', 0.025),
 ('steerer', 0.025),
 ('channelisation', 0.025)]

In [121]:
chosen_option = None
distance = np.inf

#for option in convert('directed', WN_VERB, WN_NOUN):
for option in predicates.keys():
    candidate = option
    candidate_distance = editdistance.eval(candidate, 'directed')

    if candidate_distance < distance:
        chosen_option = option
        distance = candidate_distance

print(chosen_option)

director


In [63]:
from decouple import config
listen_freq = 2
chatbot = Chatbot()

class Agent:
    def __init__(self, username, password):
        self.username = username
        # Initialize the Speakeasy Python framework and login.
        self.speakeasy = Speakeasy(host=config("UZH_SPEAKEASY_HOST"), username=username, password=password)
        self.speakeasy.login()  # This framework will help you log out automatically when the program terminates.

    def listen(self):
        while True:
            # only check active chatrooms (i.e., remaining_time > 0) if active=True.
            rooms: List[Chatroom] = self.speakeasy.get_rooms(active=True)
            for room in rooms:
                if not room.initiated:
                    # send a welcome message if room is not initiated
                    room.post_messages(f'Hello! And Grüetzi, Merhaba, Bonjour! How can I help you today?')
                    room.initiated = True
                # Retrieve messages from this chat room.
                # If only_partner=True, it filters out messages sent by the current bot.
                # If only_new=True, it filters out messages that have already been marked as processed.
                for message in room.get_messages(only_partner=True, only_new=True):
                    print(
                        f"\t- Chatroom {room.room_id} "
                        f"- new message #{message.ordinal}: '{message.message}' "
                        f"- {self.get_time()}")

                    # Implement your agent here #
                    if (message.message.lower() == "hi") | (message.message.lower() == "hello"):
                        answer='Hello! And Grüetzi, Merhaba, Bonjour! How can I help you today?'
                    else:
                        try:
                            answer = chatbot(message.message)
                        except:
                            answer = "Sorry :( I could not understand you. Can you rephrase your question?"
                    # Send a message to the corresponding chat room using the post_messages method of the room object.
                    room.post_messages(f"{answer}")
                    # Mark the message as processed, so it will be filtered out when retrieving new messages.
                    room.mark_as_processed(message)

                # Retrieve reactions from this chat room.
                # If only_new=True, it filters out reactions that have already been marked as processed.
                for reaction in room.get_reactions(only_new=True):
                    print(
                        f"\t- Chatroom {room.room_id} "
                        f"- new reaction #{reaction.message_ordinal}: '{reaction.type}' "
                        f"- {self.get_time()}")

                    # Implement your agent here #

                    room.post_messages(f"Received your reaction: '{reaction.type}' ")
                    room.mark_as_processed(reaction)

            time.sleep(listen_freq)

    @staticmethod
    def get_time():
        return time.strftime("%H:%M:%S, %d-%m-%Y", time.localtime())


In [None]:
demo_bot = Agent(config("UZH_BOT_USERNAME"), config("UZH_BOT_PASSWORD"))
demo_bot.listen()

#### predicate match using entity's relations in graph

In [None]:
def match_things(dict, input):
    input_list = input.split(" ")
    tmp = np.inf
    tmpp= [len(word) for word in input_list]
    match_key = ""
    match_value = ""
    for key, value in dict.items():
        if editdistance.eval(key, input) < tmp:
            tmp = editdistance.eval(key, input)
            key_list = key.split(" ")
            len_input_list, len_key = len(input_list), len(key_list)
            is_input_list_longer = (len_input_list>len_key)
            index_input_list = list(range(len_input_list)) + [len_input_list-1]*(len_key-len_input_list)*(not is_input_list_longer)
            index_key = list(range(len_key)) + [len_key-1]*(len_input_list-len_key)*(is_input_list_longer)
            word_wise_comparison = [editdistance.eval(key_list[k], input_list[i]) for i,k in zip(index_input_list,index_key)]
            if len_input_list == len(word_wise_comparison):
                bool_update = [(tmpp[i] > word_wise_comparison[i]) for i in range(len_input_list)]
            else:
                bool_update = [(tmpp[i] > word_wise_comparison[k]) for i,k in zip(index_input_list,index_key)]
            if sum(bool_update)>0:
                if len_input_list == len(word_wise_comparison):
                    tmpp = word_wise_comparison
                else:
                    tmpp = word_wise_comparison[:len_input_list]
                    tmpp[len_input_list-1] += sum(word_wise_comparison[len_input_list:])
                match_key = key
                match_value = value   
    return match_key,match_value

In [None]:
matched = []
is_two_fncs = 1
for child in [children for children,score in convert('released', WN_VERB, WN_NOUN)]:
    # editdistance and fuzz similarity fnc
    matched.append([match_things(predicates, child)[0], process.extract(child,[key for key,value in predicates.items()],scorer=fuzz.ratio)[0][0]])
matched = list(matched for matched,_ in itertools.groupby(matched))
matched

In [None]:
chosen_option = None
distance = np.inf
# Good Neighbors' relations
predicates_of_entity = list( dict.fromkeys([k for s,p,o in g.triples((WD.Q3110682, None, None)) for k,v in predicates.items() if v==str(p)]) )


for option in matched:
    if isinstance(option,list):
        if option[0] in predicates_of_entity:
            chosen_option= option[0]
            break
        elif option[1] in predicates_of_entity:
            chosen_option= option[1]
            break
print(chosen_option)