In [37]:
# check that k3l_test contains appropirate variants for k3l
import pandas as pd
from Bio.Seq import Seq

# read script output
input_file = 'k3l_test.tsv'
df = pd.read_csv(input_file, sep='\t')
df.fillna('', inplace=True)

df.query('position == 13')

# gather iupac-encoded aa missense variants by position
df1 = df.groupby('position')['iupac_aa'].apply(list).reset_index()
map_dict = dict(zip(df.position, df.wt_codon))
df1['wt_codon'] =df1.position.map(map_dict)
df1.iupac_aa = df1.iupac_aa.str.join('').str.split('')
df1.iupac_aa = df1.iupac_aa.apply(lambda x: set(x))
df1.iupac_aa.apply(lambda x: x.remove(''))
df1['wt_aa'] = df1.wt_codon.apply(lambda x: str(Seq(x).translate()))

# get the missense variants for the wt codon
def aa_missense_variants(codon):  
    nucleotides = 'ACGT'
    wt_aa = str(Seq(codon).translate())
    missense_aa = []
    for position in range(3):  
        for n in nucleotides:
            new_codon = codon[:position] + n + codon[position + 1:]
            new_aa = str(Seq(new_codon).translate())
            if new_aa != wt_aa:
                missense_aa.append(new_aa)
            else: 
                continue
    return set(missense_aa)
df1['wt_missense'] = df1.wt_codon.apply(aa_missense_variants)

# should just be stop codons removed
df1['missing_from_wt'] = df1.wt_missense - df1.iupac_aa
# should just be synonymous variants
df1['added_from_wt'] = df1.iupac_aa - df1.wt_missense
# should just be stops and wt/synonymous variants
df1['sym_diff'] = df1.apply(lambda x: x['iupac_aa'].symmetric_difference(x['wt_missense']), axis=1)

def check_sym_diff(row):
    sym_set = row['sym_diff']
    wt_aa = str(row['wt_aa'])
    for i in sym_set:
        if i not in ["*", wt_aa]:
            return True
    else:
        return False
        
        
df1['check_sym'] = df1.apply(check_sym_diff, axis=1)
df1.check_sym.sum()

0

In [38]:
df1.head()

Unnamed: 0,position,iupac_aa,wt_codon,wt_aa,wt_missense,missing_from_wt,added_from_wt,sym_diff,check_sym
0,11,"{E, P, T, G, V, S}",GCG,A,"{E, P, T, G, V, S}",{},{},{},False
1,12,"{A, D, R, V, S, C}",GGT,G,"{A, D, R, V, S, C}",{},{},{},False
2,13,"{A, N, H, G, E, Y, V}",GAT,D,"{A, N, G, H, E, Y, V}",{},{},{},False
3,14,"{L, A, E, G, V, I}",GTA,V,"{L, A, E, G, I}",{},{V},{V},False
4,15,"{L, M, K, R, T, V}",ATA,I,"{L, M, K, R, T, V}",{},{},{},False
