# String Spelled by a Genome Path Problem
Find the string spelled by a genome path.

Given: A sequence of k-mers Pattern1, ... , Patternn such that the last k - 1 symbols of Patterni are equal to the first k - 1 symbols of Patterni+1 for i from 1 to n-1.

Return: A string Text of length k+n-1 where the i-th k-mer in Text is equal to Patterni for all i.

# Sample Dataset
ACCGA
CCGAA
CGAAG
GAAGC
AAGCT
# Sample Output
ACCGAAGCT
# Extra Dataset
[text](GenomePathString.txt)

In [55]:
import pyperclip

In [44]:
def genome_path_string(kmers):
    # Start with the first k-mer
    genome_string = kmers[0]
    
    # Append the last character of each subsequent k-mer
    for kmer in kmers[1:]:
        genome_string += kmer[-1]
    
    return genome_string


In [31]:
# Example usage
kmers = [
    "ACCGA",
    "CCGAA",
    "CGAAG",
    "GAAGC",
    "AAGCT"
]

result = genome_path_string(kmers)
print(result)  # Should print "ACCGAAGCT"

ACCGAAGCT


In [47]:
def genome_path_string_from_file(file_path):
    # Ouvrir le fichier et lire toutes les lignes
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Supprimer les espaces et les sauts de ligne
    kmers = [line.strip() for line in lines]
    
    # Vérifier s'il y a des k-mers dans le fichier
    if not kmers:
        return ""
    
    # Construire la chaîne génomique à partir des k-mers
    genome_string = kmers[0]
    
    for kmer in kmers[1:]:
        genome_string += kmer[-1]
    
    return genome_string


In [58]:
#input = "02_Reconstruct a String from its Genome Path/rosalind_ba3b (2).txt"
result = genome_path_string_from_file ("02_Reconstruct a String from its Genome Path/rosalind_ba3b (2).txt")
result

# Copier le résultat dans le presse-papier
pyperclip.copy(result)


#output = 
#GCCGGGATCGGGATCAGGCCGTCCTGAATTCTCGGGAAACTTTTTGAGAACAGCATAGGCCGGGGTGCACACTAGGCCGGTCTAGTGTGACCTCGGGCACAAAATAATACTTGGCCAGCCTAACATCTAGATTGGATGGCGTCACCGGATCCTCCTGCAGGTAGGGGAAACTTATTACCATGTACTATTCGAAGATGCTAGTCGCTTCGAATTTGAGGCGGCAGTTCCGCAATCATATCTCGAACAGCTCAAGCCTCACAAGCTCCATGCCGGCAATTTCACTCCGACGTGCGCTCTATCGTATAGCTGGGTACTTGTTGAAGGCACAAAACGCCCGATAGACTTTCAGAATGAGGCAACAAATGGACCTTGTACCTGCTGGCGAGCCTGAATAACCTGATTCCCACTTCGTTAAGGAGTACATGATAGACGGCCCGCAGCATGGCCAGCAGCGAGCTGGCGGTGAGACTTACCCTATTTGAGCCATGAAAGCTTTGTCTTGTGCAAGTTTTCTAGGCCACGTGCGTTGGAGGATAATGTGCTGAGTATAAGTGACCCTATATCGTCCATCTTTTATTTCGGAGCTCATTATCCCTTGAAGTAAAGTACACGGCTGTCGGCTACCTCACAAATCTCCGGTTTCAATCGCCTATATGTTGCATTGTCGTTGGGGGCGTCCGCCGTACGCGTTCGGATAGATAGCAGGGGTGAGGTCCTCCGACGCGCTCACGCTTTGTCACTTTGGTCCTCTGCTCGTGTGTACGAATAGACAGTCCCGTTGAGTGGCTCGCGCAGCCCCTTCCGAGACTCCCCATCCGTTGGAGCCAGCGAGTGACGCAGCTGGAGTTGCTGCTGAACGCCTCCTATGGCGGTGCTTGACCAAACGGCTCAACAAGAATTCGAGTCGCCTTCAAAGCAGACTCGGCGAGCACATTGAGTGTGCAACGGGGCGTGGCATCCACAGTTTGCCGCCATCACCTCTGAAGACAATACAAATGGGCCACATCAAATCAACACACCGTGAAAGAATTTCCGTGGAAGTGCGTCATTGCACACACGAGCATTCTGTGCGGGTTCTAGTTGCAAGGCCCTATCCCTACGAAAAGAAAACATATGGAGGAAGACTTAGGTTCACGTGAGAAGAAAAATCGAGGGCACCAAGCATCAGACCTACCGTTACTCTCTGGGTCCTTGGGCAAGCGAACTTAGTGTTCTATATATAAACGATCTAAGACAGGAAAACAGTAAGACTTGTATTTCAGTCGTTGCCGGCGAAGCAGTATCATGGGAGTGTCTGCTTTGTCTACTCCACTTTATGCCAAACAATTCGAGTACCAGATAGCCCTGCTTCGTCTTATACGAAGCAGTACCCATATCAAATCCCGAAGGTCGACCGTCCGGGGATCGCAAAACGAAAGAGGTTCAAGGGCCTATGCGCATTCAAATAAGGTCGCTAAGTAGCAAACCCATAAAGTCTGCTGCTATTCAGACTGGGAGATTGGACGTTCTGCTGCCACAGAATCCGGGAGGAGTTGGCAGGCCTAGTGAATTTATGCCCGCTTAGGCAGAGTCTCTCTAGGATCGAATATGAGCTACTGTCGTCATTGTGGGCAAATGTTTCCCCATCGCCGAAGGAGCTTGCCCAAATCTGCTCACACATTTAAGACCCGTCACTACTCAAATGTTTATTGCGAGTCTTTCCTGGGGCGAAATACTATTTTCGACCTCTTAAATGTTAGATTCCCACGTTGTGGCCCTCATCCGCCGGGTCCTGCAAGCGTGATCTCTGCACTTTTTCATCCGCGCATCGTAGCAAGCTTACATATGGCAAAATCGTATCTGATTTTCTAACCTCGATTTCACGTGCATATTCATCATAGATTCTGTAGCCCAGGTAACGTCCCGGACCCTACCCTCCCCGGAATGGATTCCAGTCAGCAGACATCGCTAACGCCCGGGCGTATAATGAGACTTAATATGCAGCCGAAACGCGGGTCCTTAGTGATGGGAAAATGTAGACTAGGCAACAAAGGCCCATAACTGCTGAATCCGCAAGTCTGATACTATATTCGAACCGGGCTACAATTCACAGCGTCCTTCACAATGGTCCTGCTCTCCCATTTAGAAATATACATCCATTGGATTACTCTACGCGCGATTGGCGGCGGGACAGGATCTACAACAAGACAGACTAGATAAACCTGATCCAGGGAACGCGGTACTTAGGCCCGACACTTACAGGACGTCTGTGCAGAAGGCAGGTTTAGCTAGTTTTCCATCATTAGTCGACACGCTCGGATAAGAGCGGAAGCAAGGTTCTGGGCGGGCGACGGCACGCGGTGCAATTTATAAGGTGGATCCTATGTCCAGATTAGCAGTGCTTTCATGCACCACTTATAGCATAGCGAACTCTTACAACCCGTTTGAGATCGTATCGGGTATCAAAATTTTCACAGAGGTGCGATACCAACCGTTGTCCGCCCCACCCAAGAATACAACCATTGTGTAATAGTGTGCCATAGGTGTGGCACGCCCCAGAGTAGGTTCGCCGTGAATTTCTGCCTCGGATACCAATAATGAGACTTTCAGGATGACACTAGCTGTTGCTTTCCACTGCCAGAATATCCGGCCTCTTGCATACGCCCGTGGAACGGCAAGCTGTCCTCGCGGTAATTGTTCCATATTGGCTCTACACAGAGTAGGCTACTGTTTAGGGATCAGAATTTCGCCGATGCTGAGTTGATCCTTTGTAGTCAAGGGTCATCATTTCCCGCTAGTTTTCCATCGGTCCTTGCGCGTGGCTAAGCGACGCATGCAAGGTCCCAGCGAATAAGTATTCGATCATCGCCCCTAAGGCCATAGCTACGCTGGTGTGTAGCTTTAATCTATAGGGGGTAAATCTCGAGGGATAGATGACCATACAGAGTTCTTTTGTTTGACTCAGCAGTTGACGGCCGAATACCCTCGGTCTACCGAGCCCCTTGAAATCTAAAGCATATCCGTACTAAAATAACTCGAACCCTTGCTCCACATAAGCCGAGTGTAACGAGGTGCCTGCGCATGTGTGTAAGAAACAGGTCTAGTAATAACTCTGATCTCTACTGAAGGGAGTTTGACCTAAACGGGTGCGTGTTGCGCGGTCCATGCTGCAGAGTGTTCGAAGAATCATGAGGGTACGACGGGTCTAGTCGATTCTGCATTTATTTGATACATGCGAGAGAGCTGGAAATCCTCCCAGGCGTTATATAGGAATGTGTACGTGCCAGCTCATGGATGGGTCCCATAACTATTGTAGTTGAGGGAATCTCAATTCGATTTCAACCCTACACAGACCGAAAGACTTGTAACAACGTCTGCTTGTACCCGCATCGAGCCCAACAGATTAGTCCAATCTTACTTGGAAACACCATGCCCAGACCCAGGGCCCGGGTAAAATTCGACCTCGAGGCGCAGTAAGTCATGGTATAGGGTCAGACATGGAAAGTACAACCGTGAGGTTGCTCCTTTTCATATGGCAATAAGCATGGTGAATACGGTGTGGGTATCATCTTGGACCTATGCATTACCACAGCCGCTCTTCGCGCTGGATCCCAACAGTCTTATTTATTGTGGTTAGACACGGAGATACGCTTTGAGAATCCCGTGACATCCTATCCGCCCTAAATTGCGGCATGGCAGGGCTCAGCCATTCTCGACATTCTGTGCGCGACCTAGGCCTTCGCAACGTCTTCAATGAATACTTTTTGACTAGGGCTCAGGAGGAGAAGCGGCAGCGCACCTATAAACTGTTCCAGCGAAACTATCTGTACTCTCGAAGGGTACCTGGTTTGAGTAGAAACTCTCATACTCCATTATTGCCGGTTGTTGAGTCTCATAGAAGACGAGAGACTCTTCGTAGTGTCGGTGGGCGACAACTTAACCTTGAATCCCCGGCCCAAGGCCGCTTGCGTGGGTTCACGGAATTGAGATCCGCTCAGTGTAACGAAATCCTGTGACAATGGCACAGTTCGCCGCCTTTGCACGTAACTACGACTTGCTTACCGGCCCCCTAGGCAAGCTAAGGGAAGGGTGAAAAGAATATTATCAAAGAAGCACGGGAGTGCCGTATATGGCAACCGGGCTCCTTGACTGAGCGACGACGCCGGGTATTGCCCATATAGTATTATATTGTTGGCCAGCAAAAAACAACTGCGACAAGCAGATGATTGAGCGTTCCTAAGCGCTTTCGTGGTCCAAATGTTTGTTTCGGGGTGCAAGGAGCATCAACTCTAGCCTCATATAGGATGACGACGTAGGCACTCGATCAGTTGGAGCTCACACACTATGTTTTGTTAAATCAACTAGGCAGGCCGGACGTAGGCTGCCGAACCAGATTAGTCTGTGCTATGAATCGTCTTTGATGATTAACCGTCAATTGTTAATAATTCAAGCTACTGAGTCGTGAAGCTTTCCGCCTGACAACGCGGGAAAATCCGGCCTCTTAGTCCTGAACTAGACGAGTGTTCGTCGCGTTGTTCCTGCTCGCGTCCCCCAGATGTTGGGGTCGTGAGTCTCCGAAGCGTTCCGTCACCTGTGTTCCCCTCAACGTGACCTTTCCTTAACTTGTTCAGGCAGTGTTAGCGGCCGCTCACTCGTTATTTAGTTTTGCAGCTGAATAGATGCATATTCAATACCCAATGGAGTTCGCCATAGTGTATAACGGCTAATGAGTAGTAGACCGAAAAGCTGGCCTAGTTCAGGGGCAGGATATGCTGCGTGCGATATCCAGTTGGCCCGCCATGTCTGAGTGTCCTGCTTCGTACTTGAGGACCTCAGCCTGACTCTGTTTACCTGGAACAGGGTTTAAGCAAGAGCCGTACTATCTACTGCGGAGTACCAGGCTAGCGTTCCATTGCTTCCGGAATTATTCCAGGACCAAAAGTATCCCGGTTCGACGCGTTGGCCGATAGACGTCCGATGCTTCTCCGTATGTTTAACTGGAACAGTAAGATGAAACACAC

#https://rosalind.info/problems/ba3b/solutions/