In [4]:
from collections import Counter
def read_fasta(fasta_file):

   # reads a FASTA file and returns a dictionary with sequence identifiers as keys and sequences as values.
    sequences = {}
    with open(fasta_file, 'r') as file:
        sequence_id = None
        sequence = []
        for line in file:
            # checking if the line is a header
            if line.startswith(">"):
                # saving the previous sequence if it exists
                if sequence_id is not None:
                    sequences[sequence_id] = ''.join(sequence)
                # reset for the new sequence
                sequence_id = line[1:].strip()
                sequence = []
            else:
                # add this line of sequence
                sequence.append(line.strip())

        # add the last sequence in the file to the dictionary
        if sequence_id is not None:
            sequences[sequence_id] = ''.join(sequence)
    return sequences

def calculate_nucleotide_frequency(sequence):
    # calculating the frequencies of nucleotides (A, C, G, T) in a given sequence and prints the results.

    # using Counter to count nucleotides efficiently
    nucleotide_counts = Counter(sequence)

    total_nucleotides = sum(nucleotide_counts.values())
    if total_nucleotides > 0:
        print("Frequency of nucleotides:")
        for nucleotide in "ACGT":
            frequency = nucleotide_counts[nucleotide] / total_nucleotides
            print(f"{nucleotide}: {frequency:.4f}")
    else:
        print("No nucleotides found.")

# replace 'example_fasta_file.fasta' with the path to your actual FASTA file
fasta_file_path = 'example_fasta_file.fasta'
sequences = read_fasta(fasta_file_path)

first_sequence_id, first_sequence = next(iter(sequences.items()))
print(f"Stats for sequence ID: {first_sequence_id}")
calculate_nucleotide_frequency(first_sequence)


Stats for sequence ID: NC_001416.1 Enterobacteria phage lambda, complete genome
Frequency of nucleotides:
A: 0.2543
C: 0.2343
G: 0.2643
T: 0.2471
