In [2]:
import spacy
nlp = spacy.load("en_core_web_lg")

import numpy as np
import re
import importlib
import time
from typing import Optional

import ogree_wiki as wiki
importlib.reload(wiki)

<module 'ogree_wiki' from 'c:\\Users\\thoxy\\OneDrive\\Documents\\IMT-PJENT\\OGrEE_NLP\\ogree_wiki.py'>

In [3]:
ACTIONS_DEFAULT = {
                    "ACTION_POSITIVE" : ["make","build","put","place"],
                    "ACTION_NEGATIVE" : ["remove", "delete"], 
                    "ALTERATION" : ["modify", "change","move","set","rename","rotate"]
                    }

ACTIONS_CLI = {
                "ACTION_POSITIVE" : "+",
                "ACTION_NEGATIVE" : "-"
                }

SIMILARITY_THRESHOLD = 0.5

PARAMETERS_DICT = {
            "name" : ["name","called"],
            "position" : ["position","at","located"],
            "rotation" : ["rotation","turned","degree"],
            "size" : ["size","dimensions"],
            "template" : ["template"],
            "axisOrientation" : ["axis", "orientation"],
            "floorUnit" : ["floor","unit"],
            "slot" : ["slot"]
            }


In [4]:
def findIndexMainSubject(processed_entry : type(nlp("")), indexAction : int, dictioIndexKeyWords : dict) -> int :

    counter = 0
    currentIndexes = {index:index for index,_ in dictioIndexKeyWords.items() if processed_entry[index].pos_ != "VERB"}
    currentWords = {index:processed_entry[index] for index in currentIndexes.keys()}
    while (not indexAction in currentIndexes.values()) and counter < 3 :
        currentWords = {originIndex : processed_entry[currentIndex].head for originIndex,currentIndex in currentIndexes.items()}
        currentIndexes = {originIndex : currentWords[originIndex].i for originIndex,_ in currentIndexes.items()}
        counter += 1

    if counter == 3 :
        raise Exception("Did not find the main subject")
    
    if list(currentIndexes.values()).count(indexAction) != 1 :
        listIndexesRemaining = [originIndex for originIndex,currentIndex in currentIndexes.items() if currentIndex == indexAction and originIndex > indexAction]
        return listIndexesRemaining[0]
    else :
        return [originIndex for originIndex,currentIndex in currentIndexes.items() if currentIndex == indexAction][0]


In [5]:
def searchEntity(processed_entry : type(nlp("")), KEY_WORDS_ENTRY : dict, currentIndex : int):
    if currentIndex in KEY_WORDS_ENTRY.keys() and KEY_WORDS_ENTRY[currentIndex] == "entity":
        return currentIndex
    else:
        for child in processed_entry[currentIndex].children:
            index = searchEntity(processed_entry, KEY_WORDS_ENTRY, child.i)
            if index != None:
                return index
        return None

def searchName(processed_entry : type(nlp("")), KEY_WORDS_ENTRY : dict, currentIndex : int):
    if processed_entry[currentIndex].is_upper or processed_entry[currentIndex].pos_ == "PROPN" :
        return currentIndex
    else:
        for child in processed_entry[currentIndex].children:
            index = searchName(processed_entry, KEY_WORDS_ENTRY, child.i)
            if index != None:
                return index
        return None

def findParent(processed_entry : type(nlp("")), KEY_WORDS_ENTRY : dict, dictEntities : dict, firstEntityIndex : int):
    secondEntityIndex = None
    for child in processed_entry[firstEntityIndex].children:
        index = searchEntity(processed_entry, KEY_WORDS_ENTRY, child.i)
        if index != None:
            secondEntityIndex = index
            break
    if secondEntityIndex != None:
        secondEntityName = str(processed_entry[searchName(processed_entry, KEY_WORDS_ENTRY, secondEntityIndex)])
    else:
        secondEntityName = None
    return (dictEntities[secondEntityIndex], secondEntityName)

In [6]:
def findHierarchy(processed_entry : type(nlp("")), dictioEntities : dict) :
    hierarchyPositions = {list(wiki.ENTITIES.keys()).index(entity) for entity in dictioEntities.values()}


In [7]:
def name(processed_entry : type(nlp("")), dictioEntities : dict, indexesMain : list) -> int :

    def findClose(processed_entry : type(nlp("")), index : int) -> (int|None) :
        if index +1 <= len(processed_entry)-1 :
            if processed_entry[index+1].is_upper or processed_entry[index+1].pos_ == "PROPN" :
                return index+1
        if 0 <= index -1 :
            if processed_entry[index-1].is_upper or processed_entry[index-1].pos_ == "PROPN" :
                return index-1
        return None
    

    def findAttachedEntity(processed_entry : type(nlp("")), index : int) -> (int|None) : 
        counter = 0
        isFound = False
        currentIndex = index
        currentWord = processed_entry[currentIndex]
        while (not isFound) and counter < 3 :
            currentWord = processed_entry[currentIndex].head
            currentIndex = currentWord.i
            if str(currentWord) in dictioEntities.values() :
                return currentIndex
            if currentWord.pos_ == "VERB" and currentIndex == indexesMain[1] : 
                return indexesMain[0]
            counter += 1
        return None
    
    EXPLICIT =  PARAMETERS_DICT["name"]
    IMPLICIT = ["current","main"]

    dictioEntityNames = {}
        
    for index,entity in dictioEntities.items() :
    # if the name if right beside the entity
        resultIndex = findClose(processed_entry, index)
        if resultIndex != None :
            dictioEntityNames[index] = resultIndex

    if len(dictioEntityNames) < len(dictioEntities) : # if not all names found

        for index,token in enumerate(processed_entry) : # look for keyword

            if processed_entry[index].is_upper or processed_entry[index].pos_ == "PROPN" :
                indexAttachedEntity = findAttachedEntity(processed_entry,index)
                if not (indexAttachedEntity in dictioEntityNames or indexAttachedEntity == None) :
                    dictioEntityNames[indexAttachedEntity] = index
            
            # if the token is a synonym of called
            if sum([token.similarity(nlp(word)[0]) > SIMILARITY_THRESHOLD for word in EXPLICIT]) >= 1 :
                resultIndex = findClose(processed_entry, index)
                if resultIndex != None :
                    indexAttachedEntity = findAttachedEntity(processed_entry,index)
                    if not (indexAttachedEntity in dictioEntityNames or indexAttachedEntity == None) :
                        dictioEntityNames[indexAttachedEntity] = resultIndex


    if len(dictioEntityNames) < len(dictioEntities) : # if still not all names found
        pass
    # TODO : get current names

    # TODO : check for implicit words now
    
    return dictioEntityNames

In [8]:
def position(processed_entry : type(nlp("")), index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int) -> Optional[list] :
    # search for keywords to the right and left to split whole sentence into parts where to look for param entries
    next_words = processed_entry[index+1:nextKeyWordIndex]
    previous_words = processed_entry[lastKeyWordIndex:index]

    LENGTH_CRITERIA = 2
    if attachedEntity == "device" :
        LENGTH_CRITERIA = 1
    position_list = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in next_words]))
    if not (len(position_list) == LENGTH_CRITERIA or (len(position_list) == 3 and attachedEntity == "rack")):
        position_list_left = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in previous_words]))
        if not (len(position_list) == LENGTH_CRITERIA or (len(position_list) == 3 and attachedEntity == "rack")) :
            raise Exception("Wrong location format entered")
        else :
            position_list = position_list_left
    result = [float(coord) for coord in position_list] if position_list else None
    return result

In [9]:
def rotation(processed_entry : type(nlp("")), index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int) -> Optional[float] :
    next_words = processed_entry[index+1:nextKeyWordIndex]
    previous_words = processed_entry[lastKeyWordIndex:index]

    # TODO : adapt for rack
    
    isRotationNegative = False
    rotation_list = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in next_words]))
    isRotationNegative = re.search("counter.*clockwise", "".join([str(token) for token in next_words]))
    if not len(rotation_list) == 1 :
        rotation_list_left = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in previous_words]))
        isRotationNegative = re.search("counter.*clockwise", "".join([str(token) for token in next_words]))
        if not len(rotation_list_left) == 1 :
            raise Exception("Wrong rotation format entered")
        else :
            rotation_list = rotation_list_left
    rotationFinal = float(rotation_list[0]) if rotation_list else None
    rotationFinal = -rotationFinal if isRotationNegative else rotationFinal
    return rotationFinal

In [10]:
def size(processed_entry : type(nlp("")), index : int, lastKeyWordIndex : int, nextKeyWordIndex : int) -> Optional[list] :
    # TODO : it only works for regular size not sizeXY or sizeU
    next_words = processed_entry[index+1:nextKeyWordIndex]
    last_words = processed_entry[lastKeyWordIndex:index]

    size_list = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in next_words]))
    if not (len(size_list) == 3):
        size_list_left = re.findall("[-]*[0-9]+[.]*[0-9]*", " ".join([str(token) for token in last_words]))
        if not (len(size_list) == 3) :
            raise Exception("Wrong location format entered")
        else :
            size_list = size_list_left
    result = [float(coord) for coord in size_list] if size_list else None
    return result

In [11]:
def template(processed_entry : type(nlp("")), index : int, lastKeyWordIndex : int, nextKeyWordIndex : int) -> Optional[str] :
    #The template is defined by a name, it's the same structure as for Name() but simplified since there is less convoluted wording
    
    def findClose(indexToken : int) -> Optional[int] :
        if indexToken +1 <= len(processed_entry)-1 :
            if processed_entry[indexToken+1].is_upper or processed_entry[indexToken+1].pos_ == "PROPN" :
                return indexToken+1
        if 0 <= indexToken -1 :
            if processed_entry[indexToken-1].is_upper or processed_entry[indexToken-1].pos_ == "PROPN" :
                return indexToken-1
        return None

    def findAttachedEntity(indexToken : int) -> (bool) : 
        counter = 0
        isFound = False
        currentIndex = indexToken
        currentWord = processed_entry[currentIndex]
        while (not isFound) and counter < 3 :
            currentWord = processed_entry[currentIndex].head
            currentIndex = currentWord.i
            if str(currentWord) in PARAMETERS_DICT["template"] : #Peux être faut il passer aux Lemma ?
                return True
            counter += 1
        return False
    
    EXPLICIT =  PARAMETERS_DICT["name"]

    # if the name if right beside the entity
    resultIndex = findClose(index)
    if resultIndex != None :
        return processed_entry[resultIndex]
    else: #look in the NProp if one is linked to Template
        for indexTemp,tokenTemp in enumerate(processed_entry) : # look for keyword
            if processed_entry[indexTemp].is_upper or processed_entry[indexTemp].pos_ == "PROPN" :
                indexAttachedEntity = findAttachedEntity(indexTemp)
                if findAttachedEntity(indexTemp) :
                    return tokenTemp
            
            """
            # if the token is a synonym of called
            if sum([tokenTemp.similarity(nlp(word)[0]) > SIMILARITY_PARAMETER for word in EXPLICIT]) >= 1 :
                resultIndex = findClose(indexTemp)
                if resultIndex != None :
                    indexAttachedEntity = findAttachedEntity(indexTemp)
                    if indexAttachedEntity != None :
                        resultIndex = indexAttachedEntity
                else {
                    return resultIndex
                }
            """

    return None

In [26]:
# TODO : the similarity func is very time-taking, we must shorten the process time or find another way

def main() -> str :
    FINAL_INSTRUCTION = ""

    # TODO : add already existing entity names
    ENTITIES_FULL_NAME = {"entity" : list(wiki.ENTITIES.keys())}
    KEY_WORDS_ALL = {**ENTITIES_FULL_NAME,  **PARAMETERS_DICT}

    natural_entry = input("Enter a prompt. Please follow the instructions given.\n")
    processed_entry = nlp(natural_entry)

    KEY_WORDS_ENTRY = {} 
    # we detect key words in the sentence given and put them into KEY_WORDS_ENTRY
    lastParameter = None
    for index,token in enumerate(processed_entry) :
        matching_list = [] # list of tuples with the similarity score and type of key word (for each key word)
        if token.pos_ == "VERB" and str(token) == token.lemma_ : # 2nd test : if infinitive verb
            for parameter in ACTIONS_DEFAULT.keys() :
                similarity = max([token.similarity(nlp(word)[0]) for word in ACTIONS_DEFAULT[parameter]])
                matching_list.append((similarity,parameter))
        elif token.pos_ in ["NOUN","ADP","VERB"]:
            for parameter in KEY_WORDS_ALL.keys() : 
                similarity = max([token.similarity(nlp(word)[0]) for word in KEY_WORDS_ALL[parameter]])
                matching_list.append((similarity,parameter))
        else :
            continue

        match = max(matching_list)

        # if "called" or a synonym is used for a parameter and not for an entity
        if match[1] == "name" and (lastParameter == token.head or lastParameter in token.children) :
            continue
        if match[0] > SIMILARITY_THRESHOLD :
            # if is considered a key word, is added to the dict
            KEY_WORDS_ENTRY[index] = match[1] 
            if parameter in PARAMETERS_DICT.keys() :
                lastParameter = token

    print(KEY_WORDS_ENTRY)

    # test detection
    list_key_param = list(KEY_WORDS_ENTRY.values())
    count_action = 0 # the nb of action words indentified
    for action_type in ACTIONS_DEFAULT.keys() :
        count_action += list_key_param.count(action_type)

    if count_action != 1 :
        raise Exception("Issue with action dectection")

    global INDEX_ACTION
    global INDEX_MAIN_SUBJECT
    INDEX_ACTION = [index for index,keyword in KEY_WORDS_ENTRY.items() if keyword in ACTIONS_DEFAULT.keys()][0]
    INDEX_MAIN_SUBJECT = findIndexMainSubject(processed_entry, INDEX_ACTION, KEY_WORDS_ENTRY)  
    print("index main : ",INDEX_MAIN_SUBJECT)

    dictEntities = {index : str(processed_entry[index]) for index,keyword in KEY_WORDS_ENTRY.items() if keyword == "entity"}
    print(dictEntities)
    dictioEntityNames = name(processed_entry, dictEntities, [INDEX_MAIN_SUBJECT, INDEX_ACTION])
    print("names : ",dictioEntityNames)

    # TODO : match cases (main = entity/parameter, verb = +/alteration...)
    if KEY_WORDS_ENTRY[INDEX_MAIN_SUBJECT] == "entity" :

        if KEY_WORDS_ENTRY[INDEX_ACTION] == "ACTION_POSITIVE" : 
            # we do the processes related to each parameter
            dictioEntityParameters = wiki.makeDictParam(str(processed_entry[INDEX_MAIN_SUBJECT]))
            dictioEntityParameters["name"] = str(processed_entry[dictioEntityNames[INDEX_MAIN_SUBJECT]])
            allEntryItems = KEY_WORDS_ENTRY.items()
            for counter,(index,parameter) in enumerate(allEntryItems) :
                if (not parameter in PARAMETERS_DICT.keys()) or bool(dictioEntityParameters[parameter]) == True :
                    continue
                lastKeyWordIndex = 0 if counter == 0 else list(allEntryItems)[counter-1][0]
                nextKeyWordIndex = len(processed_entry) if counter == len(allEntryItems)-1 else list(allEntryItems)[counter+1][0]
                # get the parameter value
                parameterValue = globals()[parameter](processed_entry, index, lastKeyWordIndex, nextKeyWordIndex)
                dictioEntityParameters[parameter] = parameterValue # store the value

            print(dictioEntityParameters)
            # TODO : call the class method

        if KEY_WORDS_ENTRY[INDEX_ACTION] == "ACTION_NEGATIVE" :
            # TODO : only check if the entity is well detected
            pass

        if KEY_WORDS_ENTRY[INDEX_ACTION] == "ALTERATION" :
            # TODO : understand what are the changes and its consequences
            pass

    if KEY_WORDS_ENTRY[INDEX_MAIN_SUBJECT] in PARAMETERS_DICT.keys() :
        # TODO : seek the value to be changed
        pass
    
    # if seeking the name for the main entity, pass the indexaction as parameter
    # if no name found, check the type of action : if +, a name is needed, otherwise not necessarily

    # check if parameters were not given

    # return KEY_WORDS_ENTRY


In [27]:
text = "create, in the site IMT, a building A with 0 0 position"
main()

token :  set head :  set lastParameter :  None False
token :  position head :  set lastParameter :  None False
token :  for head :  set lastParameter :  position False
token :  building head :  for lastParameter :  position False
token :  named head :  building lastParameter :  building True
token :  with head :  named lastParameter :  building False
token :  dimension head :  with lastParameter :  building False
token :  turned head :  set lastParameter :  dimension False
token :  by head :  turned lastParameter :  turned True
token :  degrees head :  by lastParameter :  turned False
token :  with head :  turned lastParameter :  degrees False
token :  template head :  with lastParameter :  degrees False
token :  called head :  template lastParameter :  template True
{0: 'ALTERATION', 2: 'position', 7: 'entity', 12: 'size', 17: 'rotation', 20: 'rotation', 24: 'template'}
index main :  2
{7: 'building'}


  if sum([token.similarity(nlp(word)[0]) > SIMILARITY_PARAMETER for word in EXPLICIT]) >= 1 :


names :  {7: 9}


In [14]:
nlp("inside")[0].similarity(nlp("having")[0])
# [nlp("current")[0].similarity(nlp(word)[0]) for word in ["axis", "orientation"]]

0.39670881628990173

In [15]:
#Test position, size, rotation, template
testNlp = nlp("set the position 0 0 for the building named BATIMENT with the dimension 50 40 60, turned by 90 degrees and with the template called intel640")
testNlp2 = nlp("make the building A in the site called IMT")

# print("Position :" + str(position(testNlp, 0, 0, 4, "building")))
# print("Rotation :" + str(rotation(testNlp, 15, 11, 19, "building")))
# print("Dimension :" + str(size(testNlp, 10, 6, 14, "building")))
# print("Template :" + str(template(testNlp, 22, 19, 23, "building")))

for x in PARAMETERS_DICT.keys() :
    print(nlp(x)[0].similarity(nlp("inside")[0]))

0.07210052758455276
0.31477174162864685
0.2948082685470581
0.307853639125824
0.3341883718967438
0.0
0.0
0.1039787158370018


  print(nlp(x)[0].similarity(nlp("inside")[0]))


In [16]:
spacy.displacy.render(testNlp, style="dep")

In [17]:
nlp("R1")[0].is_upper

True