## Imports

In [2]:
from collections import deque
import regex as re
import heapq

## Graphs


In [3]:
class Graph:
    def __init__(self, g=None):
        self.graph = {}
        if g:
            for v, neighbors in g.items():
                self.add_vertex(v)
                for d in neighbors:
                    self.add_edge(v, d)

    def add_vertex(self, v):
        if v not in self.graph:
            self.graph[v] = []

    def add_edge(self, o, d):
        self.add_vertex(o)
        self.add_vertex(d)
        if d not in self.graph[o]:
            self.graph[o].append(d)

    def print_graph(self):
        for v in self.graph:
            print(v, "->", self.graph[v])

    def size(self):
        return len(self.get_nodes()), len(self.get_edges())

    def get_nodes(self):
        return list(self.graph.keys())

    def get_edges(self):
        edges = []
        for v in self.graph:
            for d in self.graph[v]:
                edges.append((v, d))
        return edges

    def get_successors(self, v):
        return self.graph.get(v, [])

    def get_predecessors(self, v):
        return [u for u in self.graph if v in self.graph[u]]

    def get_adjacents(self, v):
        return list(set(self.get_successors(v)) | set(self.get_predecessors(v)))

    def in_degree(self, v):
        return len(self.get_predecessors(v))

    def out_degree(self, v):
        return len(self.get_successors(v))

    def degree(self, v):
        return len(self.get_adjacents(v))

    def reachable_bfs(self, v):
        queue = [v]
        visited = set()
        while queue:
            node = queue.pop(0)
            for neighbor in self.get_successors(node):
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append(neighbor)
        return list(visited)

    def reachable_dfs(self, v):
        stack = [v]
        visited = set()
        while stack:
            node = stack.pop()
            for neighbor in reversed(self.get_successors(node)):
                if neighbor not in visited:
                    visited.add(neighbor)
                    stack.append(neighbor)
        return list(visited)

    def distance(self, s, d):
        if s == d:
            return 0
        queue = [(s, 0)]
        visited = set([s])
        while queue:
            node, dist = queue.pop(0)
            for neighbor in self.get_successors(node):
                if neighbor == d:
                    return dist + 1
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append((neighbor, dist + 1))
        return None

    def shortest_path(self, s, d):
        if s == d:
            return [s]
        queue = [(s, [])]
        visited = set([s])
        while queue:
            node, path = queue.pop(0)
            for neighbor in self.get_successors(node):
                if neighbor == d:
                    return path + [node, neighbor]
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append((neighbor, path + [node]))
        return None

    def node_has_cycle(self, v):
        queue = [v]
        visited = set([v])
        while queue:
            node = queue.pop(0)
            for neighbor in self.get_successors(node):
                if neighbor == v:
                    return True
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append(neighbor)
        return False

    def has_cycle(self):
        return any(self.node_has_cycle(v) for v in self.graph)

    def visualize(self):
        dot = graphviz.Digraph(comment='Graph', format='png')
        for v in self.graph:
            dot.node(v)
        for v in self.graph:
            for d in self.graph[v]:
                dot.edge(v, d)
        dot.render('output/unweighted_graph', view=True)


class WeightedGraph(Graph):
    def __init__(self, g=None):
        self.graph = {}
        if g:
            for v, neighbors in g.items():
                self.add_vertex(v)
                for d, w in neighbors:
                    self.add_edge(v, d, w)

    def add_edge(self, o, d, w):
        self.add_vertex(o)
        self.add_vertex(d)
        self.graph[o].append((d, w))

    def get_edges(self):
        edges = []
        for v in self.graph:
            for d, w in self.graph[v]:
                edges.append((v, d, w))
        return edges

    def get_successors(self, v):
        return [d for d, _ in self.graph.get(v, [])]

    def dijkstra(self, start):
        distances = {v: float('inf') for v in self.graph}
        distances[start] = 0
        heap = [(0, start)]
        visited = set()

        while heap:
            dist_u, u = heapq.heappop(heap)
            if u in visited:
                continue
            visited.add(u)
            for v, weight in self.graph[u]:
                alt = dist_u + weight
                if alt < distances[v]:
                    distances[v] = alt
                    heapq.heappush(heap, (alt, v))
        return distances

    def visualize(self):
        dot = graphviz.Digraph(comment='Weighted Graph', format='png')
        for v in self.graph:
            dot.node(v)
        for v in self.graph:
            for d, w in self.graph[v]:
                dot.edge(v, d, label=str(w))
        dot.render('output/weighted_graph', view=True)

In [4]:
print("=== Unweighted Graph ===")
g1 = {
    'A': ['B', 'C'],
    'B': ['D'],
    'C': ['D'],
    'D': []
}

G1 = Graph(g1)
G1.print_graph()
print("Nodes:", G1.get_nodes())
print("Edges:", G1.get_edges())
print("Size:", G1.size())
print("Successors of A:", G1.get_successors("A"))
print("Predecessors of B:", G1.get_predecessors("B"))
print("Adjacents of C:", G1.get_adjacents("C"))
print("Out-degree of B:", G1.out_degree("B"))
print("In-degree of C:", G1.in_degree("C"))
print("Degree of A:", G1.degree("A"))
print("Shortest path from A to D:", G1.shortest_path('A', 'D'))
print("Distance from A to D:", G1.distance("A", "D"))
print("Reachable from A (BFS):", G1.reachable_bfs('A'))
print("Reachable from A (DFS):", G1.reachable_dfs('A'))
print("Graph has cycle:", G1.has_cycle())
print()

=== Unweighted Graph ===
A -> ['B', 'C']
B -> ['D']
C -> ['D']
D -> []
Nodes: ['A', 'B', 'C', 'D']
Edges: [('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')]
Size: (4, 4)
Successors of A: ['B', 'C']
Predecessors of B: ['A']
Adjacents of C: ['A', 'D']
Out-degree of B: 1
In-degree of C: 1
Degree of A: 2
Shortest path from A to D: ['A', 'B', 'D']
Distance from A to D: 2
Reachable from A (BFS): ['B', 'C', 'D']
Reachable from A (DFS): ['B', 'C', 'D']
Graph has cycle: False



In [5]:
print("=== Weighted Graph ===")
g2 = {
    'A': [('B', 1), ('C', 4)],
    'B': [('C', 2), ('D', 5)],
    'C': [('D', 1)],
    'D': []
}

G2 = WeightedGraph(g2)
G2.print_graph()
print("Nodes:", G2.get_nodes())
print("Edges:", G2.get_edges())
print("Size:", G2.size())
print("Successors of A:", G2.get_successors("A"))
print("Predecessors of B:", G2.get_predecessors("B"))
print("Adjacents of C:", G2.get_adjacents("C"))
print("Out-degree of B:", G2.out_degree("B"))
print("In-degree of C:", G2.in_degree("C"))
print("Degree of A:", G2.degree("A"))
print("Dijkstra distances from A:", G2.dijkstra('A'))
print("Reachable from A (DFS):", G2.reachable_dfs('A'))
print("Graph has cycle:", G2.has_cycle())

=== Weighted Graph ===
A -> [('B', 1), ('C', 4)]
B -> [('C', 2), ('D', 5)]
C -> [('D', 1)]
D -> []
Nodes: ['A', 'B', 'C', 'D']
Edges: [('A', 'B', 1), ('A', 'C', 4), ('B', 'C', 2), ('B', 'D', 5), ('C', 'D', 1)]
Size: (4, 5)
Successors of A: ['B', 'C']
Predecessors of B: []
Adjacents of C: ['D']
Out-degree of B: 2
In-degree of C: 0
Degree of A: 2
Dijkstra distances from A: {'A': 0, 'B': 1, 'C': 3, 'D': 4}
Reachable from A (DFS): ['B', 'C', 'D']
Graph has cycle: False


# Redes Metabólicas

##  Projeto de Alto Nível

### Componentes Principais

1. **Entrada de Dados (`ecoli.txt`)**
   - Formato: uma reação por linha
   - Exemplo: `R1: A + B => C + D`

2. **Parser de Reações**
   - Extrai dados estruturados de substratos e produtos
   - Produz uma lista de dicionários:
     ```python
     {
       "id": "R1",
       "substrates": ["A", "B"],
       "products": ["C", "D"]
     }
     ```

3. **Construção do Grafo Metabólico**
   - Usa a classe `MyGraph`
   - Cada nó representa um metabolito
   - Arestas ligam metabolitos que participam da mesma reação

4. **Módulo de Centralidade (`CentralityAnalyzer`)**
   - Calcula:
     - Centralidade de grau (Degree)
     - Centralidade de proximidade (Closeness)
     - Centralidade de intermediação (Betweenness)

5. **Módulo de Propagação de Metabolitos**
   - Dado um conjunto inicial de metabolitos:
     - Identifica reações ativas (todos os substratos disponíveis)
     - Coleta produtos gerados
     - Repete o processo iterativamente

6. **Saída**
   - Lista dos metabolitos mais centrais por métrica
   - Metabolitos finais produzíveis a partir de um conjunto inicial

---
## Projeto de Baixo Nível

### Funções Principais

####  `parse_reactions(filepath)`
- Lê `ecoli.txt` e transforma em dicionários de reação

####  `build_metabolite_graph(reactions)`
- Constrói um grafo a partir da coocorrência de metabolitos

####  `CentralityAnalyzer`
- Classe para cálculo das centralidades
- Métodos:
  - `degree_centrality()`
  - `closeness_centrality()`
  - `betweenness_centrality()`
  - `top_nodes(centrality_dict, top_n)`

####  `get_active_reactions(metabolites, reactions)`
- Retorna reações cujos substratos estão disponíveis

####  `get_produced_metabolites(active_reactions)`
- Retorna os produtos das reações ativas

####  `compute_final_metabolites(initial_metabs, reactions)`
- Aplica a propagação metabólica iterativamente


In [6]:
def is_in_tuple_list(tl, val):
    for (x, _) in tl:
        if val == x:
            return True
    return False

class MN_Graph(Graph):
    def reachable_with_dist(self, s):
        res = []
        l = [(s, 0)]
        while l:
            node, dist = l.pop(0)
            if node != s:
                res.append((node, dist))
            for elem in self.get_successors(node):
                if not is_in_tuple_list(l, elem) and not is_in_tuple_list(res, elem):
                    l.append((elem, dist + 1))
        return res

    def mean_distances(self):
        tot = 0
        num_reachable = 0
        for k in self.get_nodes():
            distsk = self.reachable_with_dist(k)
            for _, dist in distsk:
                tot += dist
            num_reachable += len(distsk)
        meandist = float(tot) / num_reachable if num_reachable else 0
        n = len(self.get_nodes())
        density = float(num_reachable) / ((n - 1) * n) if n > 1 else 0
        return meandist, density

    def closeness_centrality(self, node):
        dist = self.reachable_with_dist(node)
        if len(dist) == 0:
            return 0.0
        s = sum(d[1] for d in dist)
        return len(dist) / s

    def highest_closeness(self, top=10):
        cc = {k: self.closeness_centrality(k) for k in self.get_nodes()}
        ord_cl = sorted(cc.items(), key=lambda x: x[1], reverse=True)
        return [x[0] for x in ord_cl[:top]]

    def betweenness_centrality(self, node):
        total_sp = 0
        sps_with_node = 0
        for s in self.get_nodes():
            for t in self.get_nodes():
                if s != t and s != node and t != node:
                    sp = self.shortest_path(s, t)
                    if sp:
                        total_sp += 1
                        if node in sp:
                            sps_with_node += 1
        return sps_with_node / total_sp if total_sp > 0 else 0.0

    def clustering_coef(self, v):
        adjs = self.get_adjacents(v)
        if len(adjs) <= 1:
            return 0.0
        ligs = 0
        for i in adjs:
            for j in adjs:
                if i != j and (j in self.get_successors(i) or i in self.get_successors(j)):
                    ligs += 1
        return float(ligs) / (len(adjs) * (len(adjs) - 1))

    def all_clustering_coefs(self):
        return {k: self.clustering_coef(k) for k in self.get_nodes()}

    def mean_clustering_coef(self):
        ccs = self.all_clustering_coefs()
        return sum(ccs.values()) / float(len(ccs)) if ccs else 0.0

    def mean_clustering_perdegree(self, deg_type="inout"):
        degs = self.all_degrees(deg_type)
        ccs = self.all_clustering_coefs()
        degs_k = {}
        for k in degs:
            degs_k.setdefault(degs[k], []).append(k)
        ck = {}
        for k in degs_k:
            tot = sum(ccs[v] for v in degs_k[k])
            ck[k] = tot / len(degs_k[k])
        return ck


class CentralityAnalyzer:
    def __init__(self, graph):
        self.graph = graph

    def degree_centrality(self):
        return {node: len(self.graph.get_successors(node)) for node in self.graph.get_nodes()}

    def closeness_centrality(self):
        centrality = {}
        for node in self.graph.get_nodes():
            total_dist, reachable_count = self._bfs_total_distance_and_reach_count(node)
            centrality[node] = (reachable_count / total_dist) if total_dist > 0 else 0.0
        return centrality

    def _bfs_total_distance_and_reach_count(self, start):
        visited = set()
        queue = deque([(start, 0)])
        total = 0
        reachable_count = 0

        while queue:
            node, dist = queue.popleft()
            if node not in visited:
                visited.add(node)
                if node != start:
                    total += dist
                    reachable_count += 1
                for neighbor in self.graph.get_successors(node):
                    if neighbor not in visited:
                        queue.append((neighbor, dist + 1))

        return total, reachable_count


    def betweenness_centrality(self):
        centrality = dict.fromkeys(self.graph.get_nodes(), 0.0)
        for s in self.graph.get_nodes():
            stack = []
            pred = {w: [] for w in self.graph.get_nodes()}
            sigma = dict.fromkeys(self.graph.get_nodes(), 0)
            dist = dict.fromkeys(self.graph.get_nodes(), -1)
            sigma[s] = 1
            dist[s] = 0
            queue = deque([s])

            while queue:
                v = queue.popleft()
                stack.append(v)
                for w in self.graph.get_successors(v):
                    if dist[w] < 0:
                        dist[w] = dist[v] + 1
                        queue.append(w)
                    if dist[w] == dist[v] + 1:
                        sigma[w] += sigma[v]
                        pred[w].append(v)

            delta = dict.fromkeys(self.graph.get_nodes(), 0)
            while stack:
                w = stack.pop()
                for v in pred[w]:
                    delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w])
                if w != s:
                    centrality[w] += delta[w]
        return centrality

    def top_nodes(self, centrality_dict, top_n=5):
        return heapq.nlargest(top_n, centrality_dict.items(), key=lambda x: x[1])

# REACTION PARSER
def parse_reactions(file_path):
    """Parses ecoli.txt into a list of reaction dicts"""
    reactions = []
    with open(file_path, 'r') as f:
        for line in f:
            if ':' not in line: continue
            parts = re.split(r':\s*', line.strip(), maxsplit=1)
            if len(parts) != 2: continue
            reaction_id, formula = parts
            match = re.search(r"^(.*?)\s*(<=>|=>)\s*(.*?)$", formula)
            if not match: continue
            substrates = [m.strip() for m in match.group(1).split('+')]
            products = [m.strip() for m in match.group(3).split('+')]
            reactions.append({
                'id': reaction_id,
                'substrates': substrates,
                'products': products
            })
    return reactions

def build_metabolite_graph(reactions):
    g = MN_Graph()
    for r in reactions:
        metabolites = r['substrates'] + r['products']
        for i in range(len(metabolites)):
            for j in range(i + 1, len(metabolites)):
                g.add_edge(metabolites[i], metabolites[j])
    return g

# ASSESSMENT FUNCTIONS
def get_active_reactions(metabolites_set, reactions):
    return [r for r in reactions if all(sub in metabolites_set for sub in r['substrates'])]

def get_produced_metabolites(active_reactions):
    produced = set()
    for r in active_reactions:
        produced.update(r['products'])
    return produced

def compute_final_metabolites(initial_metabolites, reactions):
    known_metabolites = set(initial_metabolites)
    while True:
        active = get_active_reactions(known_metabolites, reactions)
        new = get_produced_metabolites(active)
        if new.issubset(known_metabolites):
            break
        known_metabolites.update(new)
    return known_metabolites




In [7]:
file_path = "ecoli.txt"
reactions = parse_reactions(file_path)

print(" Reactions parsed:", len(reactions))

# Centrality Analysis
g = build_metabolite_graph(reactions)
analyzer = CentralityAnalyzer(g)

print("\n--- Degree Centrality ---")
for node, val in analyzer.top_nodes(analyzer.degree_centrality()):
    print(f"{node}: {val}")

print("\n--- Closeness Centrality ---")
for node, val in analyzer.top_nodes(analyzer.closeness_centrality()):
    print(f"{node}: {val:.4f}")

print("\n--- Betweenness Centrality ---")
for node, val in analyzer.top_nodes(analyzer.betweenness_centrality()):
    print(f"{node}: {val:.4f}")

#  Metabolite Propagation 
initial_metabs = ["M_glc_DASH_D_c", "M_h2o_c", "M_nad_c", "M_atp_c"]
final_metabs = compute_final_metabolites(initial_metabs, reactions)

print("\n--- Initial Metabolites ---")
print(initial_metabs)

print("\n--- Final Reachable Metabolites ---")
print(sorted(final_metabs))

 Reactions parsed: 931

--- Degree Centrality ---
M_atp_c: 234
M_h2o_c: 211
M_h_c: 196
M_pi_c: 132
M_h_e: 98

--- Closeness Centrality ---
M_12ppd_DASH_S_e: 1.0000
M_h2o_c: 0.5682
M_h_c: 0.5672
M_atp_c: 0.5637
M_pi_c: 0.5192

--- Betweenness Centrality ---
M_h_c: 216934.6170
M_h2o_c: 130880.9425
M_atp_c: 73977.9868
M_pi_c: 41413.1577
M_h_e: 38528.6860

--- Initial Metabolites ---
['M_glc_DASH_D_c', 'M_h2o_c', 'M_nad_c', 'M_atp_c']

--- Final Reachable Metabolites ---
['M_13dpg_c', 'M_23ddhb_c', 'M_23dhb_c', 'M_23dhba_c', 'M_23dhmb_c', 'M_2dda7p_c', 'M_2ddg6p_c', 'M_2me4p_c', 'M_34hpp_c', 'M_3dhq_c', 'M_3dhsk_c', 'M_3mob_c', 'M_3psme_c', 'M_4hbz_c', 'M_4per_c', 'M_6pgc_c', 'M_6pgl_c', 'M_ade_c', 'M_adn_c', 'M_adp_c', 'M_adphep_DASH_DD_c', 'M_adphep_DASH_LD_c', 'M_alac_DASH_S_c', 'M_amp_c', 'M_ara5p_c', 'M_atp_c', 'M_camp_c', 'M_cbp_c', 'M_chor_c', 'M_co2_c', 'M_db4p_c', 'M_dha_c', 'M_dhap_c', 'M_dnad_c', 'M_dxyl5p_c', 'M_e4p_c', 'M_f6p_c', 'M_fdp_c', 'M_for_c', 'M_fprica_c', 'M_g3p_c', 

## Genome Assembly

### High-Level Description (Conceptual)

**Goal**: Reconstruct the original DNA sequence from a list of overlapping fragments (k-mers).

**Approach**:

1. **De Bruijn Graph**:

   * Represent k-mers as edges.
   * Nodes are (k-1)-mers (prefixes/suffixes).
   * Find an **Eulerian path**: a path that visits every edge exactly once.
   * Rebuild the sequence by following this path.

2. **Overlap Graph**:

   * Represent k-mers as nodes.
   * Add an edge from node A to node B if the suffix of A matches the prefix of B (length k-1).
   * Find a **Hamiltonian path**: a path that visits every node exactly once.
   * Reconstruct the sequence by joining overlapping fragments along the path.

---

###  Low-Level Description (Implementation)

**De Bruijn Algorithm**:

* For each k-mer, add an edge from `prefix(k-mer)` to `suffix(k-mer)`.
* Ensure the graph is *nearly balanced* (1 start, 1 end node).
* Add a temporary edge from end to start.
* Use **Hierholzer’s algorithm** to find an Eulerian cycle.
* Remove the temporary edge to get the Eulerian path.
* Reconstruct the sequence by concatenating characters from each node.

**Overlap Graph Algorithm**:

* Label each fragment uniquely (e.g., `"ATG-1"`).
* For every pair of fragments, add an edge if `suffix(A) == prefix(B)`.
* Use **backtracking** to search for a Hamiltonian path.
* Rebuild the sequence using the first full fragment and last characters from the rest.


In [8]:
def prefix(seq): return seq[:-1]
def suffix(seq): return seq[1:]

class DeBruijnGraph(Graph):
    def __init__(self, frags):
        super().__init__()
        # Add edges where each k-mer contributes an edge from its prefix to its suffix
        for seq in frags:
            self.add_edge(prefix(seq), suffix(seq))

    def check_nearly_balanced_graph(self):
        # Identify if the graph is nearly balanced:
        # One node with out-degree = in-degree + 1 (start)
        # One node with in-degree = out-degree + 1 (end)
        res = None, None  # (start, end)
        for n in self.graph:
            indeg = self.in_degree(n)
            outdeg = self.out_degree(n)
            if indeg - outdeg == 1:
                res = res[0], n  # candidate to be end node
            elif outdeg - indeg == 1:
                res = n, res[1]  # candidate to be start node
            elif indeg != outdeg:
                return None, None  # not balanced or nearly balanced
        return res

    def eulerian_path(self):
        # Find a Eulerian path using Hierholzer's algorithm
        start, end = self.check_nearly_balanced_graph()
        if not start or not end:
            return None

        # Add a temporary edge to make the graph Eulerian
        self.add_edge(end, start)

        path = []
        stack = [start]
        # Copy of graph edges to allow mutation during traversal
        edges = {u: list(vs) for u, vs in self.graph.items()}

        while stack:
            u = stack[-1]
            if edges.get(u):
                stack.append(edges[u].pop())
            else:
                path.append(stack.pop())

        path.reverse()

        # Remove the temporary edge to recover the original path
        for i in range(len(path) - 1):
            if path[i] == end and path[i + 1] == start:
                return path[i + 1:] + path[1:i + 1]

        return None

    def seq_from_path(self, path):
        # Reconstruct the original sequence from a Eulerian path
        if not path:
            return None
        return path[0] + ''.join(n[-1] for n in path[1:])

class OverlapGraph(Graph):
    def __init__(self, frags):
        super().__init__()

        # Add unique suffix to each fragment to handle duplicates
        self.frags = [f"{f}-{i}" for i, f in enumerate(frags, 1)]

        # Add all vertices to the graph
        for f in self.frags:
            self.add_vertex(f)

        # Add edges based on overlap: suffix of f1 matches prefix of f2
        for f1 in self.frags:
            s1 = suffix(f1.split('-')[0])  # suffix of the sequence
            for f2 in self.frags:
                if prefix(f2.split('-')[0]) == s1:
                    self.add_edge(f1, f2)

    def search_hamiltonian_path(self):
        # Try to find a Hamiltonian path using backtracking
        def bt(path):
            if len(path) == len(self.graph):
                return path
            for neighbor in self.graph[path[-1]]:
                if neighbor not in path:
                    res = bt(path + [neighbor])
                    if res:
                        return res
            return None

        # Attempt to start from every vertex
        for start in self.graph:
            res = bt([start])
            if res:
                return res
        return None

    def get_seq(self, node):
        # Extract the original sequence from node label (e.g., 'ATG-3' -> 'ATG')
        return node.split('-')[0]

    def seq_from_path(self, path):
        # Reconstruct sequence from Hamiltonian path
        if not path:
            return None
        return self.get_seq(path[0]) + ''.join(self.get_seq(n)[-1] for n in path[1:])

In [9]:
def composition(k, seq):
    return sorted([seq[i:i+k] for i in range(len(seq)-k+1)])

def run_all(seq, k):
    print("Original sequence:", seq)
    frags = composition(k, seq)
    print("k-mers:", frags)

    # De Bruijn Graph Method
    print("\n--- De Bruijn ---")
    dbg = DeBruijnGraph(frags)
    dbg.print_graph()
    path = dbg.eulerian_path()
    if path:
        print("Eulerian path found:", path)
        print("Reconstructed sequence:", dbg.seq_from_path(path))
    else:
        print("Eulerian path not found.")

    # Overlap Graph Method with repetitions
    print("\n--- Overlap with repetitions ---")
    og = OverlapGraph(frags)
    og.print_graph()
    path = og.search_hamiltonian_path()
    if path:
        print("Hamiltonian path found:", path)
        print("Reconstructed sequence:", og.seq_from_path(path))
    else:
        print("Hamiltonian path not found.")

run_all('ATGCAATGGTCTG', 3)

Original sequence: ATGCAATGGTCTG
k-mers: ['AAT', 'ATG', 'ATG', 'CAA', 'CTG', 'GCA', 'GGT', 'GTC', 'TCT', 'TGC', 'TGG']

--- De Bruijn ---
AA -> ['AT']
AT -> ['TG']
TG -> ['GC', 'GG']
CA -> ['AA']
CT -> ['TG']
GC -> ['CA']
GG -> ['GT']
GT -> ['TC']
TC -> ['CT']
Eulerian path not found.

--- Overlap with repetitions ---
AAT-1 -> ['ATG-2', 'ATG-3']
ATG-2 -> ['TGC-10', 'TGG-11']
ATG-3 -> ['TGC-10', 'TGG-11']
CAA-4 -> ['AAT-1']
CTG-5 -> ['TGC-10', 'TGG-11']
GCA-6 -> ['CAA-4']
GGT-7 -> ['GTC-8']
GTC-8 -> ['TCT-9']
TCT-9 -> ['CTG-5']
TGC-10 -> ['GCA-6']
TGG-11 -> ['GGT-7']
Hamiltonian path found: ['ATG-2', 'TGC-10', 'GCA-6', 'CAA-4', 'AAT-1', 'ATG-3', 'TGG-11', 'GGT-7', 'GTC-8', 'TCT-9', 'CTG-5']
Reconstructed sequence: ATGCAATGGTCTG
