In [2]:
import nltk
from nltk.parse import CoreNLPParser
from nltk.tree import ParentedTree
from nltk import Tree
# Initialize the Charniak parser
parser = CoreNLPParser(url='http://localhost:9000')

# Example sentence
sentence = "an estimated 2000 ethnic Tutsi in the district of Tawba."
#sentence = "Akyetsu testified he was powerless to stop the merger of an estimated 2000 ethnic Tutsi in the district of Tawba."
# Parse the sentence
parse_tree = next(parser.raw_parse(sentence))


# Function to find the nearest common ancestor of two nodes in the parse tree
def find_nearest_common_ancestor(tree, entity1, entity2):
    entity1_index = None
    entity2_index = None
    for i, leaf in enumerate(tree.leaves()):
        if entity1 in leaf:
            entity1_index = i
        if entity2 in leaf:
            entity2_index = i
    if entity1_index is not None and entity2_index is not None:
        print("Leaf positions:", entity1_index, entity2_index)
        path_to_node1 = tree.leaf_treeposition(entity1_index)
        path_to_node2 = tree.leaf_treeposition(entity2_index)
        print("Path to node 1:", path_to_node1)
        print("Path to node 2:", path_to_node2)
        for i in range(min(len(path_to_node1), len(path_to_node2))):
            if path_to_node1[i] != path_to_node2[i]:
                return tree[path_to_node1[:i - 1]]
        return tree[path_to_node1[:i - 1]]
    else:
        print("Entities not found in the parse tree.")
        return None


# Example entities
entity1 = "Tutsi"
entity2 = "district"

# Find the nearest common ancestor of the entities
nearest_common_ancestor = find_nearest_common_ancestor(parse_tree, entity1, entity2)

if nearest_common_ancestor is not None:
    # Extract the subtree rooted at the nearest common ancestor
    mct_subtree = nearest_common_ancestor.copy(deep=True)

    # Print the Minimum Complete Tree
    print("Minimum Complete Tree (MCT):")
    
    mct_string = mct_subtree.pformat()
    t = Tree.fromstring(mct_string)
    t.pretty_print()



Leaf positions: 4 7
Path to node 1: (0, 0, 1, 0, 1, 0)
Path to node 2: (0, 0, 1, 1, 1, 0, 1, 0)
Minimum Complete Tree (MCT):
                                NP                                        
         _______________________|________                                  
        |                                NP                               
        |                   _____________|_________                        
        |                  |                       PP                     
        |                  |          _____________|______                 
        |                  |         |                    NP              
        |                  |         |        ____________|_______         
        |                  |         |       |                    PP      
        |                  |         |       |                 ___|____    
        NP                 NP        |       NP               |        NP 
  ______|______       _____|____     |    ___