In [1]:
#Ihsan Muchsin

def rankGC(infile, outfile):
    '''
    Given: 1) A file containing at most 10 DNA strings in FASTA format (of length at most 1 kbp each). 2) Name for the output file
    Return: The ID of the string having the highest GC-content, followed by the GC-content of that string (plus an output file containing this).
    '''
    
    all_seq = {}
    with open(infile, 'r') as fin:
        seq = ""
        header = None
        for line in fin:
            if line.startswith(">"): #name of the sequence in FASTA file starts with >
                if header is not None:
                    all_seq[header] = seq #add a new entry to dictionary with key==header and value==sequence
                header = line.lstrip(">").rstrip("\n") #get the sequence name if a new header is found
                seq="" #reset the sequence after new header is found
            else:
                seq += line.strip() #add every line after header to the sequence
        all_seq[header] = seq #add last entry in dictionary
    
    countGC = {}
    for header, seq in all_seq.items():
        Gcount = 0
        Ccount = 0
        for c in seq: #loop through every character in sequence and count the number of G and C in the sequence
            if c == 'G':
                Gcount += 1
            elif c == 'C':
                Ccount += 1
        GCpercentage = (Gcount + Ccount) / len(seq) *100 #calculate the GCpercentage
        countGC[header] = GCpercentage #add a new entry to dictionary countGC key==header value==GCpercentage
    
    max_seq = max(countGC, key=countGC.get) #get the name of the sequence with the max GC percentage
    max_val = countGC[max_seq] #get the value of the max GC percentage
    
    with open(outfile, 'w') as fout:
        fout.write(max_seq + '\n')
        fout.write(str(max_val))
        
    return max_seq, max_val  

In [2]:
infile = "rosalind_gc.txt"
outfile = 'gc_sol.txt'

res = rankGC(infile, outfile)
print (res[0])
print (res[1])

Rosalind_2121
52.29468599033817
