In [35]:
import numpy as np
from Bio import SeqIO

In [36]:
aligned_fasta_path = "c1_aligned.fasta"
consensus_mask = np.load("mask.npy")
mutant_fasta_path = "c1_all_2.0_aligned.fasta"

In [37]:
aligned_sequences = {record.id: str(record.seq) for record in SeqIO.parse(aligned_fasta_path, "fasta")}
mutant_sequences = {record.id: str(record.seq) for record in SeqIO.parse(mutant_fasta_path, "fasta")}

In [38]:
mask_to_sym = {
    1: 'conserved (*)',
    0: 'not conserved ( )',
    2: 'somewhat conservation (.)',
    3: 'conserved substitution (:)'
}

In [39]:
from prettytable import PrettyTable
table = PrettyTable()
table.field_names = ["Conservation", "Count", "Unchanged when steered (%)"]

by_sym = {0:[], 1:[], 2:[], 3:[]}
for mutant_id, mutant_seq in mutant_sequences.items():
    aligned_seq = aligned_sequences[mutant_id]
    seqid = [a==m for a,m in zip(aligned_seq, mutant_seq)]
    for s, i in zip(consensus_mask, seqid):
        by_sym[s].append(i)

for i in [1, 3, 2, 0]:
    table.add_row([mask_to_sym[i], len(by_sym[i]), round(100*sum(by_sym[i])/len(by_sym[i]))])

In [40]:
print(table)

+----------------------------+-------+----------------------------+
|        Conservation        | Count | Unchanged when steered (%) |
+----------------------------+-------+----------------------------+
|       conserved (*)        |  400  |            100             |
| conserved substitution (:) |  450  |             98             |
| somewhat conservation (.)  |  150  |             98             |
|     not conserved ( )      | 40150 |             95             |
+----------------------------+-------+----------------------------+
