<a href="https://colab.research.google.com/github/byunsy/bioinformatics-algorithms-py/blob/main/BA_3I.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# k-Universal Circular String Problem

### Function

In [36]:
import random
import copy

In [37]:
def prefix(string):
    # return string[:len(string)-1]
    return string[:-1]

def suffix(string):
    return string[1:]

In [29]:
def BinaryStrings(k):

    binary_set = []

    f = "#0" + str(k+2) + "b"
    for i in range(2**k):
        bi_kmer = format(i, f)[2:]
        binary_set.append(bi_kmer)
        
    return binary_set

In [34]:
print(BinaryStrings(3))

['000', '001', '010', '011', '100', '101', '110', '111']


In [38]:
def DeBruijn(patterns):
    
    graph = {}
    for kmer in sorted(patterns):
        if prefix(kmer) not in graph:
            graph[prefix(kmer)] = [suffix(kmer)]
        else:
            graph[prefix(kmer)].append(suffix(kmer))

    return graph

In [39]:
def SearchCycle(graph, start_node):
    cycle = [start_node] # start cycle
    avail_nodes = []     # nodes in the current cycle that have unused edges
    unused = copy.deepcopy(graph) # unused edges 
  
    node = start_node
    while unused[node] != []:

        # randomly select the next node to explore
        next = unused[node][random.randint(0, len(unused[node])-1)] 
        unused[node].remove(next)  # remove from graph dict value
        cycle.append(next)         # add to cycle
        node = next

        # if dead end
        if node not in unused:
            unused[node] = []
            break

    for i in unused:
        if unused[i] != []:
            avail_nodes.append(i) # find nodes with unused edges

    return cycle, avail_nodes

In [40]:
def EulerianPath(graph):

    # Initialize variables
    cycle = []
    unexp_edges = [0] 
    start_node = list(graph.keys())[0] # first key in graph

    while unexp_edges != []:

        cycle, unexp_edges = SearchCycle(graph, start_node)

        # If there exist any unexplored edges,
        # then select a new start_node from unexp_edges
        if unexp_edges:
            start_node = unexp_edges[0] 

    return cycle

In [66]:
def kUniveralCircle(k):
    patterns = BinaryStrings(k)
    db_graph = DeBruijn(patterns)
    path = EulerianPath(db_graph)

    text = path[0]
    for i in range(1, len(path)):
        text += path[i][-1]

    return text[:-k+1] # slice because it is a "circular" string


### Test Cases

In [81]:
print(kUniveralCircle(3))
print(kUniveralCircle(4))
print(kUniveralCircle(5))

01011100
0100111101011000
00100110101111101100010100000111


In [None]:
# Create a function for test suite
def TestSuite(function, cases):
    print("*"*50)
    print("TEST SUITE\n")
    passed = 0
    for i, case in enumerate(cases):
        k, pattern, answer = case
        result = function(k, pattern)
        if result == answer:
            print("- Test Case {} Passed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
            passed += 1
        else:
            print("- Test Case {} Failed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
    print("\n{} out of {} passed.".format(passed, len(cases)), end=" ")
    print("END OF TEST SUITE.")
    print("*"*50)

In [None]:
# Create test cases to pass into test suite
case1 = ()

cases = [case1]

TestSuite(kUniveralCircle, cases)