Problema de geração de contig: gerar os contigs a partir de uma coleção de leituras (com cobertura imperfeita).

* Entrada: Uma coleção de padrões k-mers.
* Saída: Todos os contigs em DeBruijn(Patterns).

In [None]:
from collections import defaultdict

def build_debruijn_graph(patterns):
    graph = defaultdict(list)
    for pattern in patterns:
        prefix = pattern[:-1]
        suffix = pattern[1:]
        graph[prefix].append(suffix)
    return graph

def is_1_in_1_out(node, graph, indegree, outdegree):
    return indegree[node] == 1 and outdegree[node] == 1

def calculate_degrees(graph):
    indegree = defaultdict(int)
    outdegree = defaultdict(int)
    
    for node in graph:
        outdegree[node] = len(graph[node])
        for neighbor in graph[node]:
            indegree[neighbor] += 1
            
    return indegree, outdegree

def find_contigs(graph):
    contigs = []
    indegree, outdegree = calculate_degrees(graph)
    
    for node in graph:
        if not is_1_in_1_out(node, graph, indegree, outdegree) and outdegree[node] > 0:
            for neighbor in graph[node]:
                path = [node, neighbor]
                while is_1_in_1_out(neighbor, graph, indegree, outdegree):
                    next_node = graph[neighbor][0]
                    path.append(next_node)
                    neighbor = next_node
                contigs.append("".join([path[0]] + [p[-1] for p in path[1:]]))
    return contigs

# Entrada como string de k-mers separados por espaços
with open("dataset_30189_5.txt", "r") as file_1:
    linhas_1 = file_1.readlines()
    input_string = linhas_1[0].strip()

# Converter a entrada em uma lista de k-mers
patterns = input_string.split()

# Construir o grafo de De Bruijn
graph = build_debruijn_graph(patterns)

# Encontrar os contigs
contigs = find_contigs(graph)

# Exibir os contigs
for contig in contigs:
    print(contig)