<a href="https://colab.research.google.com/github/byunsy/bioinformatics-algorithms-py/blob/main/BA_3D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# De Bruijn Graph from a String Problem

### Function

In [1]:
def prefix(string):
    # return string[:len(string)-1]
    return string[:-1]

def suffix(string):
    return string[1:]

In [2]:
def StringCompose(text, k):
    
    kmers = []
    bound = len(text) - k + 1
    for i in range(bound):
        kmers.append(text[i:i+k])
    
    return sorted(kmers)

In [3]:
def DeBruijn(text, k):

    # Make a list of kmers
    patterns = StringCompose(text, k-1)

    # Create empty graph
    graph = {}

    # First initialize a node for each kmer in patterns
    for kmer in patterns:
        graph[kmer] = []

    # Compare suffix(pattern) and prefix(pattern')
    for i in range(len(patterns)):
        for j in patterns[:i]+patterns[i+1:]:
            if suffix(patterns[i]) == prefix(j):
                if j not in graph[patterns[i]]:
                    graph[patterns[i]].append(j)
            
    return graph

In [4]:
def DisplayBruijn(graph):

    # Get a sorted list of keys
    sorted_keys = sorted(graph.keys())

    # Print the key and values lexicographically 
    for node in sorted_keys:
        if graph[node] != []:
            for n in graph[node]:
                print("{} -> {}".format(node, neighbor[:-2]))

### Test Cases

In [5]:
# Modified to use in TestSuite
def DisplayBruijn2(text, k):

    graph = DeBruijn(text, k)
    
    # Get a sorted list of keys
    sorted_keys = sorted(graph.keys())

    ret = ""

    # Print the key and values lexicographically 
    for node in sorted_keys:
        if graph[node] != []:
            neighbor = ""
            for n in graph[node]:
                neighbor += n + ","
            ret += "{} -> {}".format(node, neighbor[:-1])
            ret += ", "

    return ret[:-2]

In [6]:
# Create a function for test suite
def TestSuite(function, cases):
    print("*"*50)
    print("TEST SUITE\n")
    passed = 0
    for i, case in enumerate(cases):
        k, text, answer = case
        result = function(text, k)
        if result == answer:
            print("- Test Case {} Passed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
            passed += 1
        else:
            print("- Test Case {} Failed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
    print("\n{} out of {} passed.".format(passed, len(cases)), end=" ")
    print("END OF TEST SUITE.")
    print("*"*50)

In [7]:
# Create test cases to pass into test suite
case1 = (4, "AAGATTCTCTAC", "AAG -> AGA, AGA -> GAT, ATT -> TTC, CTA -> TAC, CTC -> TCT, GAT -> ATT, TCT -> CTA,CTC, TTC -> TCT" )

cases = [case1]

TestSuite(DisplayBruijn2, cases)

**************************************************
TEST SUITE

- Test Case 1 Passed. Expected: AAG -> AGA, AGA -> GAT, ATT -> TTC, CTA -> TAC, CTC -> TCT, GAT -> ATT, TCT -> CTA,CTC, TTC -> TCT, Actual: AAG -> AGA, AGA -> GAT, ATT -> TTC, CTA -> TAC, CTC -> TCT, GAT -> ATT, TCT -> CTA,CTC, TTC -> TCT

1 out of 1 passed. END OF TEST SUITE.
**************************************************
