In [2]:
import numpy as np
import csv

def create_overlap_graph(reads, k):
    num_reads = len(reads)
    overlap_graph = np.zeros((num_reads, num_reads))

    for i in range(num_reads):
        for j in range(num_reads):
            if i != j:
                overlap_graph[i, j] = calculate_overlap(reads[i], reads[j], k)

    return overlap_graph

def calculate_overlap(read1, read2, k):
    overlap = 0
    for i in range(k, 0, -1):
        if read1[-i:] == read2[:i]:
            overlap = i
            break
    return overlap

def find_hamiltonian_path(overlap_graph, genome_length):
    num_reads = overlap_graph.shape[0]
    path = []

    def backtrack(current_node, genome_length):
        if len(path) == num_reads:
            return True

        for next_node in range(num_reads):
            if overlap_graph[current_node, next_node] > 0:
                overlap_graph[current_node, next_node] = 0
                path.append(next_node)
                if backtrack(next_node, genome_length):
                    return True
                overlap_graph[current_node, next_node] = 1
                path.pop()

        return False

    for start_node in range(num_reads):
        path.append(start_node)
        if backtrack(start_node, genome_length):
            return path
        path.pop()

    return []

def reconstruct_genome(reads, path, genome_length):
    genome = reads[path[0]]

    for i in range(1, len(path)):
        overlap = calculate_overlap(reads[path[i - 1]], reads[path[i]], k)
        genome += reads[path[i]][overlap:]

    # Trim the reconstructed genome to the desired length
    genome = genome[:genome_length]

    return genome


reads = []
with open('reads.fasta', 'r') as file:
   reader = csv.reader(file)
   i = 0
   for row in reader:
       if i%2 == 1:
         reads.append(row[0])
       i += 1 

k = 10  # Length of overlap between reads
genome_length = 1000  # length of the original genome


# Reconstructing the genome using Hamiltonian path approach
overlap_graph = create_overlap_graph(reads, k)
path = find_hamiltonian_path(overlap_graph, genome_length)
reconstructed_genome = reconstruct_genome(reads, path, genome_length)

print("Reconstructed Genome:")
print(reconstructed_genome)

Reconstructed Genome:
AGCCAATAGCAGATATGCCCATACCGCTGTATTCATAGCTTTCTCTACAGCCAATAGCAGATATGCCCATACCGGTCGACTGCACGCAGTCTTCCGGAGCCAATAGCAGATATGCCCATACGCTGTATTCATAGCTTTCTCTACACGGCCTAAAAGCGGTCGACTGCACGCTGTATTCATAGCTTTCTCTACAAGGTAGGGGTACGTACATGTTTCGCTGTATTCATAGCTTTCTCTACAAATTCGACAGATGTGGTTTAATGATTCCGCCTCCTATTACAACAGCCAATAGCAGATATGCCCATACCTTATCACGTACGTAGATTCGAATAGCCAATAGCAGATATGCCCATACCAAGGTAGGGGTACGTACATGTTTCGGTCGACTGCACGCAGTCTTCCGGGTAGGTCATGGGAGGGGTGATGTGATTCCGCCTCCTATTACAACAGCGCTGTATTCATAGCTTTCTCTACAATACCAGTTTTTATTGTGTCCATGATTCCGCCTCCTATTACAACAGCGGTCGACTGCACGCAGTCTTCCGGCCCCGAGGATCCTGCACTGAGTCTGATTCCGCCTCCTATTACAACAGCTTATCACGTACGTAGATTCGAATACGGCCTAAAAGCGGTCGACTGCACGGTCGACTGCACGCAGTCTTCCGGATCCGTCCTGTCTGCGACACTGAGCCAATAGCAGATATGCCCATACCGACGCCCCTAATTGGATCAATCAGCCAATAGCAGATATGCCCATACCAATACCAGTTTTTATTGTGTCCATGAGGAGCTGGGGTGCAAATTAGACGCTGTATTCATAGCTTTCTCTACAGGGGTGATGTTCTGTTATTAGACCGCTGTATTCATAGCTTTCTCTACAGTACAAATTCGACAGATGTGGTTTGATTCCGCCTCCTATTACAACAGCCCCGAGGATCCTGCACTGAGTCTTATCACGTACGTAGATTCGAATACAAA