In [1]:
# Load important packages
import Bio
from Bio import SeqIO, SearchIO, Entrez
from Bio.Seq import Seq
from Bio.SeqUtils import GC
from Bio.Blast import NCBIWWW
from Bio.Data import CodonTable

In [2]:
# Test that BioPython can read FASTA files
# FASTA files can easily be created by using a text editor and replacing the ".txt" ending with ".fasta"
for record in SeqIO.parse("sample_GC.fasta", "fasta"):
    print(record.id)

Rosalind_6404
Rosalind_5959
Rosalind_0808


In [7]:
file = SeqIO.parse("sample_GC.fasta", "fasta")
print(file)

<Bio.SeqIO.FastaIO.FastaIterator object at 0x000002E8982943A0>


In [8]:
# (r.id, GC(r.seq)) is what we are looking for: the ID of the seq with most GC content, and the GC %
# key=lambda x: x[1] means that max() is going to take the maximum BASED ON THE SECOND ELEMENT, GC(r.seq), as indicated by x[1]
# without key = lambda x, max() would take the maximum based on the first element, which would be invalid
sample = max(((r.id, GC(r.seq)) for r in file), key=lambda x: x[1])
print(sample)

('Rosalind_0808', 60.91954022988506)


In [5]:
# Testing writing capabilities
o_s = open("GC_o_sample.txt", "a")
for r in SeqIO.parse("sample_GC.fasta", "fasta"):
    o_s.write(str(r.id) + "\n" + str(GC(r.seq)) + "\n")
o_s.close()

In [18]:
## Actual code for output
real = max(((r.id, GC(r.seq)) for r in SeqIO.parse("rosalind_GC.fasta","fasta")), key=lambda x: x[1])
print(real)
# Notice that the output is a tuple: a list that is ordered, and cannot be changed.
# When write() is used on a tuple, it literally writes the tuple (as printed below from print())
print(type(real))
o = open("GC_o.txt", "w")
# To write in the correct format instead of literally writing the tuple, format() has to be used
# The asterisk lets Python know that the placeholders should be filled by the values of the tuple
# The first placeholder {} is substituted by the first value
# The second {} is substituted by the next value
o.write("{}\n{:.3f}".format(*real))
o.close()

('Rosalind_8662', 51.4218009478673)
<class 'tuple'>
