In [None]:
import nltk
from nltk.corpus import stopwords

import spacy

Constituent Path

In [None]:
def find_constituent_path(text, entity1, entity2):
    # Parse the text into a tree structure
    tree = nltk.Tree.fromstring(text)

    # Find the two entities in the tree and extract their corresponding subtrees
    entity1_subtree = tree.subtrees(lambda t: t.label() == entity1)[0]
    entity2_subtree = tree.subtrees(lambda t: t.label() == entity2)[0]

    # Find the lowest common ancestor (LCA) of the two entities
    lca = tree.common_ancestor(entity1_subtree, entity2_subtree)

    # Extract the constituent path by traversing the tree from the LCA to each of the two entities
    path = []
    current_node = lca
    while current_node != entity1_subtree:
        path.append(current_node.label())
        current_node = current_node[0]
    path.append(entity1)
    current_node = lca
    while current_node != entity2_subtree:
        path.append(current_node.label())
        current_node = current_node[-1]
    path.append(entity2)

    return path


Bag of Words

In [None]:
def find_bag_of_words(text, entity1, entity2):
    # Tokenize the text into words
    words = nltk.word_tokenize(text)

    # Remove stop words and punctuation
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words and word.isalpha()]

    # Find the indices of the two entities
    entity1_index = words.index(entity1)
    entity2_index = words.index(entity2)

    # Extract the bag of words between the two entities
    bag_of_words = words[entity1_index + 1:entity2_index]

    return bag_of_words


Syntactic Structure

In [None]:
def find_syntactic_structure(text, entity1, entity2):
    # Parse the text with the language model
    doc = nlp(text)

    # Initialize a list to store the syntactic structures
    structures = []

    # Iterate over the entities and the dependencies between them
    for entity in doc:
        if entity.text == entity1:
            for child in entity.children:
                if child.text == entity2:
                    structures.append(child.dep_)

    return structures