In [5]:
class Trie:
    ''' Trie Structure
    '''
    def __init__(self):
        self.all_nodes = []
        self.all_edges = []
        self.root = self.add_node()

    class node:
        ''' Trie Node Structure
        '''
        def __init__(self):
            '''
            Each node has the following attributes:
            - label: node label
            - edges: list of edges from this node (edge objects)
            - indicator: (optional) if two stings are used for tree construction,
                an indicator is used to determine if the node corresponds to 
                a substring starting in Text1(#) or Text2($) or both (*)
            '''
            self.label = None
            self.edges = []
            self.indicator = None

    class edge:
        ''' Trie Edge Structure
        '''
        def __init__(self):
            '''
            Each edge has the following attributes:
            - target_node: target node object
            - label: label of this edge (symbol in Text)
            - position: position of the symbol in Text belonging this edge
            '''
            self.from_node = None
            self.target_node = None
            self.label = None
            self.position = None
    
    def add_node(self):
        ''' Add a node
        Creates node newNode and adds this node to the trie
        Labels the new node with the next available integer
        (assuming root label is 0)
        '''
        newNode = Trie.node()
        newNode.label = len(self.all_nodes)

        self.all_nodes.append(newNode)

        return newNode

    def add_edge(self, from_node, target_node, lbl, pos = None):
        ''' Add an edge
        Creates edge newEdge from 'from_node' to 'target_node' with position 'pos'
        and length 'length'
        Updates depth attribute of 'target_node'
        Adds the new edge to 'from_node' and to the tree
        '''
        newEdge = Trie.edge()
        newEdge.from_node = from_node
        newEdge.target_node = target_node
        newEdge.label = lbl
        newEdge.position = pos

        from_node.edges.append(newEdge)
        self.all_edges.append(newEdge)

        return newEdge

In [6]:
def TrieConstruction(Pattern_list):
    trie = Trie()

    for Pattern in Pattern_list:
        currentNode = trie.root

        for currentSymbol in Pattern:
            # if there is an outgoing edge from currentNode with label currentSymbol,
            # change currentNode to target_node
            for edge in currentNode.edges:
                if edge.label == currentSymbol:
                    currentNode = edge.target_node
                    break
            else:
                # add a new node newNode to Trie
                newNode = trie.add_node()
                # add a new edge from currentNode to newNode with label currentSymbol
                trie.add_edge(currentNode, newNode, currentSymbol)
                currentNode = newNode
    return trie

In [12]:
#with open("TrieConstruction/inputs/input_1.txt") as f: 
with open("dataset_865762_4.txt") as f:
    pattern = f.readline().split(' ')
    ans = TrieConstruction(pattern)

In [13]:
 for edge in ans.all_edges:
        print(str(edge.from_node.label) + ' ' + str(edge.target_node.label) + ' ' + str(edge.label))

0 1 A
1 2 C
2 3 G
3 4 T
4 5 T
5 6 T
6 7 T
7 8 A
8 9 G
9 10 C
10 11 G
11 12 G
12 13 C
13 14 G
14 15 A
15 16 C
16 17 A
17 18 A
18 19 T
19 20 G
20 21 A
21 22 C
22 23 T
23 24 T
24 25 T
25 26 G
26 27 C
27 28 C
28 29 C
29 30 T
30 31 C
31 32 A
32 33 A
33 34 T
34 35 T
35 36 G
36 37 T
37 38 A
38 39 A
39 40 G
40 41 A
41 42 C
42 43 A
43 44 C
44 45 C
45 46 G
46 47 A
47 48 G
48 49 C
49 50 C
50 51 G
51 52 G
52 53 T
53 54 G
54 55 G
55 56 C
56 57 A
57 58 G
58 59 C
59 60 T
60 61 A
61 62 A
62 63 A
63 64 T
64 65 G
65 66 G
66 67 G
67 68 A
68 69 A
69 70 G
70 71 T
71 72 C
72 73 C
73 74 C
74 75 G
75 76 T
76 77 C
77 78 A
78 79 T
79 80 T
80 81 C
81 82 C
82 83 G
83 84 T
84 85 T
85 86 C
86 87 T
87 88 C
88 89 C
89 90 T
90 91 G
91 92 C
92 93 A
93 94 C
94 95 A
95 96 G
0 97 G
97 98 C
98 99 A
99 100 A
100 101 G
101 102 C
102 103 T
103 104 T
104 105 A
105 106 T
106 107 A
107 108 T
108 109 G
109 110 C
110 111 C
111 112 A
112 113 T
113 114 G
114 115 A
115 116 C
116 117 T
117 118 A
118 119 G
119 120 C
120 121 C
121 122 G