In [1]:
from Bio import SeqIO

def read_fasta(file_path):
    sequences = []
    for record in SeqIO.parse(file_path, "fasta"):
        sequences.append(str(record.seq))
    return sequences

def read_wild_type(file_path):
    for record in SeqIO.parse(file_path, "fasta"):
        return str(record.seq)

def count_mutations(sequences, wild_type):
    position_counts = {}
    for seq in sequences:
        for i, (wt_residue, mut_residue) in enumerate(zip(wild_type, seq)):
            if wt_residue != mut_residue:
                if i not in position_counts:
                    position_counts[i] = {}
                if mut_residue in position_counts[i]:
                    position_counts[i][mut_residue] += 1
                else:
                    position_counts[i][mut_residue] = 1
    return position_counts

def main():
    fasta_file = '/home/iwe54/Sophia/pipeline/MACV/MPNN/analysis/MACV_input.fasta'  # Path to your FASTA file
    wild_type_file = '/home/iwe54/Sophia/pipeline/MACV/MPNN/analysis/MACV_MPNN.fasta'  # Path to your wild-type FASTA file

    sequences = read_fasta(fasta_file)
    wild_type = read_wild_type(wild_type_file)
    mutation_counts = count_mutations(sequences, wild_type)

    # Collect mutations and their counts into a list
    mutation_list = []
    for position, mutations in mutation_counts.items():
        for mutation, count in mutations.items():
            mutation_list.append((position + 1, wild_type[position], mutation, count))

    # Sort the list by count in descending order
    mutation_list.sort(key=lambda x: x[3], reverse=True)

    print("Mutation Counts (sorted by frequency):")
    for position, wt_residue, mutation, count in mutation_list:
        print(f"Position {position}: {wt_residue}->{mutation} occurs {count} times")

if __name__ == "__main__":
    main()

Mutation Counts (sorted by frequency):
Position 9: I->T occurs 1 times
Position 15: E->T occurs 1 times
Position 35: Y->L occurs 1 times
Position 44: K->G occurs 1 times
Position 79: T->K occurs 1 times
Position 88: R->W occurs 1 times
Position 111: P->K occurs 1 times
Position 120: T->I occurs 1 times
Position 134: L->R occurs 1 times
Position 199: P->E occurs 1 times
Position 200: I->R occurs 1 times
Position 231: S->A occurs 1 times
Position 263: I->N occurs 1 times
Position 266: M->A occurs 1 times
Position 293: N->L occurs 1 times
Position 318: D->H occurs 1 times
Position 347: Y->W occurs 1 times
Position 351: L->S occurs 1 times
Position 360: K->S occurs 1 times
