In [1]:
from random import random
from copy import deepcopy

'''Klasa graf koja sadrži sve potrebne pomoćne funkcije'''
class Graph:
    def __init__(self, adjacency_list):
        self.adjacency_list = deepcopy(adjacency_list)
        
    '''Tekstualna reprezentacija grafa, ispis liste susedstva'''
    def __str__(self):
        return f'{self.adjacency_list}'
    
    '''Metod vraća listu suseda zadatog čvora'''
    def get_neighbors(self, node):
        return self.adjacency_list[node]
    
    '''Metod vraća broj suseda zadatog čvora'''
    def num_neighbors(self, node):
        return len(self.get_neighbors(node))
    
    '''Dodavanje novog čvora u graf'''
    def add_node(self, node):
        if node not in self.adjacency_list:
            self.adjacency_list[node] = []
            
    '''Dodavanje novog suseda zadatog čvora'''
    def add_neighbor(self, node, neighbor):
        if node in self.adjacency_list:
            self.adjacency_list[node].append(neighbor)
            
    '''Uklanjanje čvora iz grafa'''
    def remove_node(self, node):
        if node in self.adjacency_list:
            del self.adjacency_list[node]
            
        for key, neighbors in self.adjacency_list.items():
            self.adjacency_list[key] = [n for n in neighbors if n != node]
            
    '''Metod vraća izlazni stepen zadatog čvora (broj izlaznih grana uz čvora)'''
    def out_degree(self, node):
        return self.num_neighbors(node)
    
    '''Metod vraća ulazni stepen zadatog čvora (broj ulaznih grana uz čvora)'''
    def in_degree(self, node):
        counter = 0
        for _, neighbors in self.adjacency_list.items():
            for neighbor in neighbors:
                if neighbor == node:
                    counter += 1
                    
        return counter
    
    '''Metod vraća listu izlaznih grana iz zadatog čvora'''
    def outbound_edges(self, node):
        neighbors = self.get_neighbors(node)
        edges = [(node, neighbor) for neighbor in neighbors]
        return edges
    
    '''Metod vraća listu ulaznih grana u zadati čvor'''
    def inbound_edges(self, node):
        edges = []
        for key, neighbors in self.adjacency_list.items():
            for neighbor in neighbors:
                if neighbor == node:
                    edges.append((key, node))
        return edges
    
    '''Metod vraća listu svih grana u grafu'''
    def get_all_edges(self):
        all_edges = []
        for key in self.adjacency_list.keys():
            all_edges += self.outbound_edges(key)
        return all_edges
    
    '''
    Metod vraća listu grana iz zadatog čvora koje ne pripadaju
    skupu posećenih grana
    '''
    def get_unvisited_edges(self, node, unvisited_edges):
        node_edges = self.outbound_edges(node)
        
        node_unvisited_edges = []
        
        for node_edge in node_edges:
            for unvisited_edge in unvisited_edges:
                if unvisited_edge == node_edge:
                    node_unvisited_edges.append(node_edge)
                    
        return node_unvisited_edges
    
    '''
    Konstruisanje obilaznog puta u grafu, 
    potrebnom za pronalaženje svih Ojlerovih ciklusa u grafu
    '''
    def bypass(self, u, v, w):
        x = f'{v}-{random()}'
        
        self.adjacency_list[u].remove(v)
        self.adjacency_list[v].remove(w)
        self.add_node(x)
        self.add_neighbor(u, x)
        self.add_neighbor(x, w)
    
    '''
    Metod proverava da li je graf prost, u kontekstu algoritma
    za pronalaženje maksimalnih nerazgranatih puteva u grafu
    '''
    def is_simple(self):
        for key in self.adjacency_list.keys():
            if self.in_degree(key) > 1:
                return False
        return True
    
    '''Metod proverava da li je graf povezan, u kontekstu DFS obilaska'''
    def is_connected(self):
        total_num_nodes = len(self.adjacency_list)
        for key in self.adjacency_list:
            stack = [key]
            visited = set([])
            
            while len(stack) > 0:
                node = stack[-1]
                visited.add(node)
                has_unvisited_neighbor = False
                
                for neighbor in self.get_neighbors(node):
                    if neighbor not in visited:
                        has_unvisited_neighbor = True
                        stack.append(neighbor)
                        
                if not has_unvisited_neighbor:
                    stack.pop()
                
            if len(visited) != total_num_nodes:
                return False
            
        return True
    
    '''Metod pronalazi čvorove čiji se ulazni stepen razlikuje od izlaznog'''
    def get_unbalanced(self):
        unbalanced = []
        
        for key in self.adjacency_list:
            if self.in_degree(key) != self.out_degree(key):
                unbalanced.append(key)
                
        return unbalanced
    
    '''Povezivanje nebalansiranih čvorova radi njihovog balansiranja'''
    def close_to_cycle(self):
        [u, v] = self.get_unbalanced()
        
        if self.in_degree(u) > self.out_degree(u):
            self.add_neighbor(u, v)
        else:
            self.add_neighbor(v, u)
        
    '''Pronalaženje svih Ojlerovih ciklusa u grafu'''
    def all_eulerian_cycles(self):
        all_graphs = [self]
        
        while True:
            non_simple_g = None
            
            for g in all_graphs:
                if not g.is_simple():
                    non_simple_g = g
                    break
                    
            if non_simple_g == None:
                break
                
            non_simple_node = None
            for key in non_simple_g.adjacency_list.keys():
                if (non_simple_g.in_degree(key) > 1):
                    non_simple_node = key
                    
            inbound_e = non_simple_g.inbound_edges(non_simple_node)
            outbound_e = non_simple_g.outbound_edges(non_simple_node)
            
            for (u, v) in inbound_e:
                for (v, w) in outbound_e:
                    if u == v and u == w: 
                        continue
                        
                    new_graph = Graph(deepcopy(non_simple_g.adjacency_list))
                    new_graph.bypass(u, v, w)
                    
                    if new_graph.is_connected():
                        all_graphs.append(new_graph)
                    
            all_graphs.remove(non_simple_g)
            
        cycles = []
        for g in all_graphs:
            cycle = g.eulerian_cycle()
            cycles.append(tuple([node.split('-')[0] for node in cycle]))
            
        deduplicated_cycles = set(cycles)
            
        return [list(cycle) for cycle in deduplicated_cycles]
        
    
    '''Pronalaženje jedong Ojlerovog ciklusa u grafu'''
    def eulerian_cycle(self):
        current_node = list(self.adjacency_list.keys())[0]
        unvisited_edges = self.get_all_edges()
        unvisited_edges_from_current_node = self.get_unvisited_edges(current_node, unvisited_edges)
        
        cycle=[current_node]
        
        while len(unvisited_edges) > 0:
            while len(unvisited_edges_from_current_node) > 0:
                current_edge = unvisited_edges_from_current_node[0]
                (from_node, to_node) = current_edge
                
                unvisited_edges.remove(current_edge)
                
                cycle.append(to_node)
                
                current_node = to_node
                unvisited_edges_from_current_node = self.get_unvisited_edges(current_node, unvisited_edges)
                
            for i in range(len(cycle)):
                current_node = cycle[i]
                unvisited_edges_from_current_node = self.get_unvisited_edges(current_node, unvisited_edges)
                if len(unvisited_edges_from_current_node) > 0:
                    cycle = cycle[i :] + cycle[1 : i+1]
                    
        return cycle
    
    '''Pronalaženje svih maksimalnih nerazgranatih puteva u grafu'''
    def maximal_non_branching_paths(self):
        paths = []
        visited=set([])
        
        for v in self.adjacency_list:
            v_in_deg = self.in_degree(v)
            v_out_deg = self.out_degree(v)
            
            if not (v_in_deg == 1 and v_out_deg == 1):
                if v_out_deg > 0:
                    for (v, w) in self.outbound_edges(v):
                        non_branching_path = [v, w]
                        visited.add(v)
                        
                        w_in_deg = self.in_degree(w)
                        w_out_deg = self.out_degree(w)
                        
                        while w_in_deg == 1 and w_out_deg == 1:
                            [(w, u)] = self.outbound_edges(w)
                            non_branching_path.append(u)
                            
                            visited.add(w)
                            w = u
                            w_in_deg = self.in_degree(w)
                            w_out_deg = self.out_degree(w)
                            
                        paths.append(non_branching_path)
                        
        for v in self.adjacency_list:
            if v not in visited:
                visited.add(v)
                
                non_branching_path = [v]
                
                neighbor = self.outbound_edges(v)
                while neighbor != None and neighbor[1] not in visited:
                    w = neighbor[1]
                    non_branching_path.append(w)
                    visited.add(w)
                    neighbor = self.outbound_edges(w)
                    
                paths.append(non_branching_path)
                        
        return paths

In [2]:
# Primer liste susedstva grafa G
adjacency_list = {
    'A': ['B'],
    'B': ['C', 'E'],
    'C': ['D'],
    'D': ['B','A'],
    'E': ['G','F'],
    'F': ['G'],
    'G': ['D','E']
}

g = Graph(adjacency_list)
g.eulerian_cycle()

['E', 'G', 'D', 'A', 'B', 'C', 'D', 'B', 'E', 'F', 'G', 'E']

In [3]:
'''
Klasa DeBruijn graf nasleđuje sve osobine bazne klase Graph
i omogućava konstrukciju DeBruijn-ovog grafa na osnovu pročitanih 
read-ova polazne sekvence
'''
class DeBruijn(Graph):
    def __init__(self, reads, k):
        kmers = self.get_kmers(reads, k)
        adjacency_list = {}
        
        for kmer in kmers:
            u = kmer[:-1]
            v = kmer[1:]
            
            if u not in adjacency_list:
                adjacency_list[u] = []
                
            if v not in adjacency_list:
                adjacency_list[v] = []
                
            adjacency_list[u].append(v)
            
        super().__init__(adjacency_list)
        
    '''Izdvajanje pojedinačnih, preklapajućih k-mera iz zadatih read-ova'''
    def get_kmers(self, reads, k):
        kmers = []
        for read in reads:
            n = len(read)
            for i in range(n - k + 1):
                kmer = read[i : i + k]
                kmers.append(kmer)
                
        return kmers

In [4]:
# Primer
reads = ['TAATGCCATGGGATGTT']
dg = DeBruijn(reads, k=3)

In [5]:
# Spajanje nebalansiranih čvorova i pronalaženje
# svih ojlerovih puteva u DeBruijn-ovom grafu
dg.close_to_cycle()
dg.all_eulerian_cycles()

[['TA',
  'AA',
  'AT',
  'TG',
  'GC',
  'CC',
  'CA',
  'AT',
  'TG',
  'GG',
  'GG',
  'GA',
  'AT',
  'TG',
  'GT',
  'TT',
  'TA'],
 ['TA',
  'AA',
  'AT',
  'TG',
  'GG',
  'GG',
  'GA',
  'AT',
  'TG',
  'GC',
  'CC',
  'CA',
  'AT',
  'TG',
  'GT',
  'TT',
  'TA']]

In [6]:
# Pronalaženje svih sekvenci generisanih
# maksimalnim nerazgranatim putevima u DeBruijn-ovom grafu
[''.join([path[i][0] for i in range(len(path))] + [path[-1][-1]]) for path in dg.maximal_non_branching_paths()]

['ATG', 'ATG', 'ATG', 'TGCCAT', 'TGG', 'TGTTAAT', 'GGG', 'GGAT']