# Eulerian Walks and Assembly

---
## Before Class
1. Review slides on Eulerian walks

---
## Learning Objectives
1. Understand and implement De Bruijn graphs for assembly


---
## Eulerian walk

To continue our implementation from last class, we will use our De Bruijn graph to output a valid sequence from the assembly. This is implemented as a recursive algorithm by considering all valid edges. You will notice that as you change k, we are able to better recapitulate our sequence depending on how repetitive it is. In a more complex implementation of a Eulerian walk there are heuristics and defined rules for determining the validity of traversing a specific edge in the graph to result in a full graph-traversal. One of these methods is to traverse the graph in a depth first manner to avoid sectioning off any part of the graph in the traversal. In our implementation we will ignore these for simplicity.

```
eulerian_walk:
Beginning at first_node as node

For node:
    follow a random valid edge from node
    remove edge
    recurse
```


In [30]:
from collections import defaultdict
import random  #belongs to python and not numpy 

class DeBruijnGraph():
    """Main class for De Bruijn graphs
    
    Private Attributes:
        graph (defaultdict of lists): Edges for De Bruijn graph
        first_node (str): starting position for traversing the graph
    """

    def __init__(self, input_string, k):
        self.graph = defaultdict(list)
        self.first_node = ''
        self.build_debruijn_graph(input_string, k)
        
    def add_edge(self, left, right):
        ''' This function adds a new edge to the graph
        
        Args:
            left (str): The k-1 mer for the left edge
            right (str): The k-1 mer for the right edge

        Updates graph attribute to add right to the list named left in defaultdict   
        '''
        self.graph[left].append(right)
        
    def remove_edge(self, left, right):
        ''' This function removed an edge from the graph
        
        Args:
            left (str): The k-1 mer for the left edge
            right (str): The k-1 mer for the right edge

        Updates graph attribute to remove right from the list named left in defaultdict
        '''
        matching_edges = []
        for i, key in enumerate(self.graph[left]):
            if key == right:
                self.graph[left].pop(i)
                break

        
    def build_debruijn_graph(self, input_string, k):
        ''' This function builds a De Buijn graph from a string
        
        Args:
            input_string (str): string to use for building the graph
            k (int): k-mer length for graph construction

        Updates graph attribute to add all valid edges from the string
        
        Example:
        >>> dbg = DeBruijnGraph("this this this is a test", 4)
        >>> print(dbg.graph) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
        defaultdict(<class 'list'>, {'thi': ['his', 'his', 'his'], 'his': ['is ', 'is ', 'is '], ...)
        '''
        for i in range(len(input_string) - k + 1):
            kmer = input_string[i:i+k]
            left_mer = kmer[0:k-1]
            right_mer = kmer[1:k]
            self.add_edge(left_mer, right_mer)
            
            #so if the list is empty, then assign the first node to the left_mer. 
            if i == 0:
                self.first_node = left_mer
                
                
    def print_eulerian_walk(self, seed=None):
        ''' This function starts the recursive walk function
        at the first node in the graph

        Args: None

        Returns:
            tour (list): list of k-1 mers traversed by the algorithm

        Example:
        >>> dbg = DeBruijnGraph("this this this is a test", 4)
        >>> dbg.print_eulerian_walk(seed=1) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
        ['thi', 'his', 'is ', 's i', ' is', 'is ', ...]
        '''
    
        tour = []
        random.seed(seed)
        #we want tour to be calling the eularian function. Here we're calling the recursive function again 
        tour = self.eulerian_walk(self.first_node, seed = seed)
        tour = tour + [self.first_node]
        #it's backwards because it is going from down to up the branch when you are actually going up from down. 
        return tour[::-1]
    
        
    def eulerian_walk(self, node, seed=None):
        ''' This is a recursive function that follows all edges from a node
        to traverse the graph
        
        Args: 
            node (str): current node to traverse from
            seed (int): seed for random selection of edge to follow
        
        Returns:
            tour (list): list of k-1 mers traversed so far by the algorithm
            Note: this will be reverse order because of recursion
            
        Example:
        >>> dbg = DeBruijnGraph("this this this is a test", 4)
        >>> dbg.eulerian_walk('thi', seed=1) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
        ['is ', 'his', 'thi', ' th', ...]
        '''
        
        #recursive function calls on itself until it doesn't -> will call on itself (until no edge left to go to). 
        
        tour = []
        random.seed(seed) #use this for random selection 
        #self.graph is defaultdict so use this as your list. 
        random.shuffle(self.graph[node])
        #extra shuffling to ensure we are randomly choosing. Not absolutely necessary. 
        for next_node in self.graph[node]:
            #for the next node, it'll be one of these randomly shuffled RKmers in the list. 
            #after random.shuffle, the list is still shuffled when you print it again. 
            next_node = random.choice(self.graph[node])
            #randomly choosing one of the elements in this shuffled list. 
            self.remove_edge(node, next_node)     
           #if there are no other nodes with edges, then the length would be zero since we are done.  
            if len(self.graph[next_node]) > 0:
                #we want tour to be calling the recursive function still. 
                tour = self.eulerian_walk(next_node, seed = seed)
                #want to add all the next_node's to the empty tour list and then return the tour list. 
                #you cannot append an empty list to another list. you have to add it it appears. 
                tour = tour + [next_node]
                #this list is full of the k-1 mers. 
        return tour
            

In [35]:
# This will now print the output from your graph using a random walk

#here we're creating our graph
graph = DeBruijnGraph("fool me once shame on shame on you fool me", 4) 
print(graph.graph)

#here we're saying take this graph and assing it the attribute of print_eulerian_walk function. 
walk = graph.print_eulerian_walk(seed=11)
walk[0] + ''.join(map(lambda x: x[-1], walk[1:]))

defaultdict(<class 'list'>, {'foo': ['ool', 'ool'], 'ool': ['ol ', 'ol '], 'ol ': ['l m', 'l m'], 'l m': [' me', ' me'], ' me': ['me '], 'me ': ['e o', 'e o', 'e o'], 'e o': [' on', ' on', ' on'], ' on': ['onc', 'on ', 'on '], 'onc': ['nce'], 'nce': ['ce '], 'ce ': ['e s'], 'e s': [' sh'], ' sh': ['sha', 'sha'], 'sha': ['ham', 'ham'], 'ham': ['ame', 'ame'], 'ame': ['me ', 'me '], 'on ': ['n s', 'n y'], 'n s': [' sh'], 'n y': [' yo'], ' yo': ['you'], 'you': ['ou '], 'ou ': ['u f'], 'u f': [' fo'], ' fo': ['foo']})


'fool me on shame once shame o'

In [36]:
print(walk)

['foo', 'ool', 'ol ', 'l m', ' me', 'me ', 'e o', ' on', 'on ', 'n s', ' sh', 'sha', 'ham', 'ame', 'me ', 'e o', ' on', 'onc', 'nce', 'ce ', 'e s', ' sh', 'sha', 'ham', 'ame', 'me ', 'e o']


In [15]:
import doctest
doctest.testmod()

**********************************************************************
File "__main__", line 107, in __main__.DeBruijnGraph.eulerian_walk
Failed example:
    dbg.eulerian_walk('thi', seed=1) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
Expected:
    ['is ', 'his', 'thi', ' th', ...]
Got:
    ['tes', ' te', 'a t', ' a ', 's a', 'is ', 'his', 'thi', ' th', 's t', 'is ', ' is', 's i', 'is ', 'his']
**********************************************************************
1 items had failures:
   1 of   2 in __main__.DeBruijnGraph.eulerian_walk
***Test Failed*** 1 failures.


TestResults(failed=1, attempted=6)

In [None]:
#a function that calls itself until it doesn't. 

In [7]:
#example of a recursive function 
def count_down(num):
    '''
    Counts down from the starting number 
    '''
    print(num)
    next_num = num - 1
    if next_num > 0:
        count_down(next_num)
    
    

In [8]:
count_down(10)

10
9
8
7
6
5
4
3
2
1


In [None]:
print(node)
next_node = node - self.remove_edge 

In [51]:
random.seed(None)

x = []
for i in range(2):
    a = random.randint(0,2)
    x.append(a)
print(x)
    

[2, 1]


In [52]:
dic_y = {"Key":["ATT", "AATTC"]}
dic_y["Key"][0]

'ATT'

In [19]:
import random
test = ["AAT", "TTC", "GGT", "AATC"]

In [14]:
random.shuffle(test)
print(f'This is the new test list: {test}')

This is the new test list: ['GGT', 'TTC', 'AAT', 'AATC']


In [34]:
dict_ex = {"KEY": ["ATT", "GGT", "CCG", "TTA"], "ANSWER": ["Amelia", "Sylvia", "Mom"]}

In [50]:
random.shuffle(dict_ex["KEY"])
print(f'This is the new shuffle: {dict_ex["KEY"]}')

This is the new shuffle: ['TTA', 'GGT', 'ATT', 'CCG']


In [91]:
random.shuffle(dict_ex["ANSWER"])
print(f'This is the new shuffle: {dict_ex["KEY"]}')

This is the new shuffle: ['GGT', 'TTA', 'ATT', 'CCG']


In [116]:
random.shuffle(dict_ex["ANSWER"])
for test in dict_ex["ANSWER"]:
    test = random.choice(dict_ex["ANSWER"])
test

'Amelia'

In [102]:
test_2 = ["AAT", "GGT", "CCT"]
random.choice(test_2)

'CCT'

In [31]:
test = ["A", "B", "C", "D", "E"]
tour = []
cool = []

In [25]:
tour_1 = tour + [test] #this is another way of adding something to a list. 

In [26]:
tour_1

[['A', 'B', 'C', 'D', 'E']]

In [27]:
tour_2 = tour.append(test)

In [29]:
print(tour_2) #this becomes a None type. 

None


In [32]:
tour_3 = tour.append(cool)

NoneType