In [1]:
from collections import Counter
import re

def read_fasta(filename):
    """Read a FASTA file and return the concatenated sequence (only A/C/G/T)."""
    with open(filename, 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    if lines[0].startswith(">"):
        lines = lines[1:]   # skip header line
    seq = ''.join(lines).upper()
    seq = re.sub(r'[^ACGT]', '', seq)  # keep only A,C,G,T
    return seq

def count_codons(seq):
    """Count non-overlapping codons in the sequence (frame 0)."""
    counts = Counter()
    for i in range(0, len(seq) - 2, 3):
        codon = seq[i:i+3]
        if len(codon) == 3:
            counts[codon] += 1
    # Ensure all 64 codons appear (even if zero)
    bases = ['A','C','G','T']
    for a in bases:
        for b in bases:
            for c in bases:
                counts.setdefault(a+b+c, 0)
    return counts

def main():
    fasta_file = "sequence (1).fasta"
    seq = read_fasta(fasta_file)
    counts = count_codons(seq)

    # Print to screen in required format
    for codon in sorted(counts.keys()):
        print(f"{codon}   {counts[codon]}")

    # Also save to file
    with open("codon_counts_output.txt", "w") as out:
        for codon in sorted(counts.keys()):
            out.write(f"{codon}   {counts[codon]}\n")

    print("\nCodon counts saved to codon_counts_output.txt")

if __name__ == "__main__":
    main()


AAA   37539
AAC   26349
AAG   20454
AAT   27522
ACA   18535
ACC   24472
ACG   27244
ACT   15015
AGA   18433
AGC   29696
AGG   18679
AGT   14964
ATA   24069
ATC   29562
ATG   26335
ATT   27975
CAA   23363
CAC   19401
CAG   35517
CAT   25647
CCA   28619
CCC   17808
CCG   34569
CCT   18521
CGA   25823
CGC   48735
CGG   34240
CGT   27185
CTA   9869
CTC   14923
CTG   35051
CTT   21272
GAA   27786
GAC   20901
GAG   15490
GAT   29516
GCA   30122
GCC   36763
GCG   49644
GCT   29014
GGA   21062
GGC   37594
GGG   17900
GGT   24124
GTA   18903
GTC   20605
GTG   19652
GTT   26209
TAA   24092
TAC   19168
TAG   10363
TAT   24411
TCA   27491
TCC   21256
TCG   25783
TCT   18708
TGA   26880
TGC   29233
TGG   27737
TGT   18737
TTA   23956
TTC   27835
TTG   23251
TTT   37572

Codon counts saved to codon_counts_output.txt
