Klasa **TrieNode** definise pojedinacne cvorove prefiksnog stabla.

In [50]:
class TrieNode:
    def __init__(self, label):                    
        self.label = label          
        self.neighbors = {}        
        self.is_leaf = True         
        
    def add_neighbor(self, character):
        self.is_leaf = False
        self.neighbors[character] = TrieNode(self.label + character) # here i create new trie node
        
    def has_neighbor(self, character):
        if character in self.neighbors:
            return True
        else:
            return False
        
    def get_neighbor(self, character):
        if self.has_neighbor(character):
            return self.neighbors[character]
        else:
            return None
        
    def get_neighbors(self):
        return self.neighbors

Klasa **Trie** definise prefiksno stablo kao i algoritam pretrage sekvence nad njime.

    #metod koji pokusava da upari sablone iz prefiksnog stabla kao prefikse niske sequence
    def prefix_trie_matching(self, sequence):
        current_node = self.root
    
        for character in sequence:
            if current_node.is_leaf:
                return current_node.label
            
            if current_node.has_neighbor(character):
                current_node = current_node.get_neighbor(character)
            else:
                return False
        
        #ovo je potrebno kada je sablon cela niska sequence
        if current_node.is_leaf:
            return current_node.label
        else:
            return False
        
    #metod koji pronalazi sva uparivanja sablona iz prefiksnog stabla unutar niske sequence    
    def trie_matching(self, sequence):
        results = []
    
        for i in range(len(sequence)):
            match = self.prefix_trie_matching(sequence[i:])
            if match:
                results.append((match, i))
            
        return results

In [51]:
class Trie:
    #konstruktorska funkcija (metod) koja za datu listu niski patterns pravi prefiksno stablo
    def __init__(self, patterns):
        self.root = TrieNode('')
    
        for pattern in patterns:
            # ======== STUDENTSKI KOD ======== #
            current_node = self.root # svaki patern pocinje od korena 

            for char in pattern:
                if not current_node.has_neighbor(char):
                    current_node.add_neighbor(char)
                current_node = current_node.get_neighbor(char)
            # ================================ #
    
    #metod koji pokusava da upari sablone iz prefiksnog stabla kao prefikse niske sequence
    #metod treba da vrati upareni sablon iz prefiksnog stabla sa prefiksom niske sequence ili None
    def prefix_trie_matching(self, sequence):
        # ======== STUDENTSKI KOD ======== #
        current_node = self.root 

        for char in sequence:
            if current_node.is_leaf:
                return current_node.label
            if current_node.has_neighbor(char):
                current_node = current_node.get_neighbor(char)
            else:
                return False
        if current_node.is_leaf:
            return current_node.label
        else:
            return False
        # ================================ # 
        
    #metod koji pronalazi sva uparivanja sablona iz prefiksnog stabla unutar niske sequence
    #metod treba da vrati listu svih uparenih sablona zajedno sa pozicijama na kojima oni pocinju u niski sequence
    def trie_matching(self, sequence):
        # ======== STUDENTSKI KOD ======== #
        results = []

        for i in range(len(sequence)):
            match = self.prefix_trie_matching(sequence[i:])
            if match:
                results.append((match, i))
        
        return results
        # ================================ #

In [52]:
patterns = ['ACCG', 'CTCT', 'GGA', 'TATA', 'ATG']
sequence = 'ATATGCTCTTGCTAGATGTGCTATA'

trie = Trie(patterns)
# print(trie.root.neighbors['A'].neighbors['C'].neighbors['C'].neighbors['G'].neighbors)

# print(trie.prefix_trie_matching(sequence))
trie.trie_matching(sequence)

[('ATG', 2), ('CTCT', 5), ('ATG', 15), ('TATA', 21)]