In [1]:
class Trie:
    ''' Trie Structure
    '''
    def __init__(self):
        self.all_nodes = []
        self.all_edges = []
        self.root = self.add_node()

    class node:
        ''' Trie Node Structure
        '''
        def __init__(self):
            '''
            Each node has the following attributes:
            - label: node label
            - edges: list of edges from this node (edge objects)
            - indicator: (optional) if two stings are used for tree construction,
                an indicator is used to determine if the node corresponds to 
                a substring starting in Text1(#) or Text2($) or both (*)
            '''
            self.label = None
            self.edges = []
            self.indicator = None

    class edge:
        ''' Trie Edge Structure
        '''
        def __init__(self):
            '''
            Each edge has the following attributes:
            - target_node: target node object
            - label: label of this edge (symbol in Text)
            - position: position of the symbol in Text belonging this edge
            '''
            self.from_node = None
            self.target_node = None
            self.label = None
            self.position = None
    
    def add_node(self):
        ''' Add a node
        Creates node newNode and adds this node to the trie
        Labels the new node with the next available integer
        (assuming root label is 0)
        '''
        newNode = Trie.node()
        newNode.label = len(self.all_nodes)

        self.all_nodes.append(newNode)

        return newNode

    def add_edge(self, from_node, target_node, lbl, pos = None):
        ''' Add an edge
        Creates edge newEdge from 'from_node' to 'target_node' with position 'pos'
        and length 'length'
        Updates depth attribute of 'target_node'
        Adds the new edge to 'from_node' and to the tree
        '''
        newEdge = Trie.edge()
        newEdge.from_node = from_node
        newEdge.target_node = target_node
        newEdge.label = lbl
        newEdge.position = pos

        from_node.edges.append(newEdge)
        self.all_edges.append(newEdge)

        return newEdge

In [2]:
def TrieConstruction(Pattern_list):
    trie = Trie()

    for Pattern in Pattern_list:
        currentNode = trie.root

        for currentSymbol in Pattern:
            # if there is an outgoing edge from currentNode with label currentSymbol,
            # change currentNode to target_node
            for edge in currentNode.edges:
                if edge.label == currentSymbol:
                    currentNode = edge.target_node
                    break
            else:
                # add a new node newNode to Trie
                newNode = trie.add_node()
                # add a new edge from currentNode to newNode with label currentSymbol
                trie.add_edge(currentNode, newNode, currentSymbol)
                currentNode = newNode
    return trie

In [3]:
def PrefixTrieMatching(Prefix, trie):
    ''' Prefix Trie Matching
    '''
    symbol = Prefix[0]
    node = trie.root

    idx = 1
    pattern = ''

    while True:
        # if node is a leaf
        if len(node.edges) == 0:
            return pattern

        # if there is an edge (node, some other node) in Trie,
        # labeled by symbol, extend pattern
        found = False
        for edge in node.edges:
            if edge.label == symbol:
                found = True
                pattern += symbol
                node = edge.target_node
                if idx != len(Prefix):
                    symbol = Prefix[idx]
                    idx += 1
                break

        if not found:
            return None


def TrieMatching(Text, trie):
    indices = []
    idx = 0
    while len(Text) != 0:
        match = PrefixTrieMatching(Text, trie)
        if match != None:
            indices.append(idx)
        Text = Text[1:]
        idx += 1
    return indices

In [28]:
#with open("TrieMatching/inputs/input_6.txt") as f: 
#with open("dataset_865762_8.txt") as f:
with open('dataset_865772_4.txt') as f:
#with open('MultiplePatternMatching/inputs/input_2.txt') as f:
    text = f.readline()
    pattern = f.readline().strip().split(" ")
    t = TrieConstruction(pattern)
    matches = TrieMatching(text, t)
#test =open('output1.txt','w')
#test.write(" ".join(str(x) for x in matches))
answer = dict()
n = len(pattern[0])

for p in pattern:
    answer[p] = []
for idx in matches:
    working = text[idx:idx+n]
    if working in answer:
        answer[working].append(idx)
    else:
        answer[working] = [idx]
file = open('output.txt', 'w')       
for key, value in answer.items():
    file.write(str(key) + ":")
    if value != []:
        file.write(' ' + ' '.join(str(x) for x in value))
    file.write('\n')

In [13]:
print(text, pattern)

bananas
 ['ana', 'as']


In [26]:
print(answer)

{'GT': [], 'AGCT': [], 'TAA': [], 'AAT': [], 'AATAT': []}
