# Analyze mutations

In [1]:
import altair as alt

import pandas as pd

Get DMS data on effects on cell entry in different cells, then create an object that can take a mutation and return the measured effects allowing for a different wildtype:

In [2]:
dms_mut_diffs = pd.read_csv("../../results/compare_cell_entry/mut_diffs.csv")

class MutEffects:
    """Return effect of a given mutation."""
    
    def __init__(
        self,
        df=dms_mut_diffs,
        phenotypes=[
            '293T_TIM1',
            '293T_Mxra8 minus 293T_TIM1',
            '293T_Mxra8',
            'C636 minus 293T_TIM1',
            'C636',
            '293T_Mxra8 minus C636',
        ],
    ):
        self.phenotypes=phenotypes
        self.effect_d = df.set_index(["site", "mutant"])[phenotypes].to_dict("index")
        pass

    def effect(self, site, wt, mutant):
        if ((site, wt) in self.effect_d) and ((site, mutant) in self.effect_d):
            return {
                p: self.effect_d[(site, mutant)][p] - self.effect_d[(site, wt)][p]
                for p in self.phenotypes
            }
        else:
            return {pheno: pd.NA for pheno in self.phenotypes}

muteffects = MutEffects()

Get mosquito passaging mutations, drop those that are not nonsynonymous amino acid mutations, add mutation effects:

In [3]:
mosquito_passaging_mutations = (
    pd.read_csv("mosquito_passaging_mutations.csv")
    .assign(
        mut_type=lambda x: x.apply(
            lambda row: (
                "deletion"
                if pd.isnull(row["AA_ALT"])
                else (
                    "synonymous" if row["AA_ALT"] == "-"
                    else "nonsynonymous"
                )
            ),
            axis=1
        ),
        protein=lambda x: x["reference_site"].str[-3: -1],
        mutation=lambda x: x["AA_REF"] + x["reference_site"] + x["AA_ALT"],
    )
)

print("Total mutation counts:")
display(
    mosquito_passaging_mutations.groupby(["mut_type", "protein"]).aggregate(
        unique_mutations=pd.NamedAgg("mutation", "nunique"),
        total_mutations=pd.NamedAgg("mutation", "count"),
    )
)

print("\nKeeping only nonsynonymous mutations")
mosquito_passaging_mutations = (
    mosquito_passaging_mutations
    .query("mut_type == 'nonsynonymous'")
    .reset_index(drop=True)
)

for pheno in muteffects.phenotypes:
    mosquito_passaging_mutations[pheno] = mosquito_passaging_mutations.apply(
        lambda row: muteffects.effect(
            row["reference_site"], row["AA_REF"], row["AA_ALT"]
        )[pheno],
        axis=1
    )

Total mutation counts:


Unnamed: 0_level_0,Unnamed: 1_level_0,unique_mutations,total_mutations
mut_type,protein,Unnamed: 2_level_1,Unnamed: 3_level_1
deletion,E2,0,0
deletion,E3,0,0
nonsynonymous,E1,12,39
nonsynonymous,E2,25,43
synonymous,6K,1,1
synonymous,E1,7,10
synonymous,E2,6,9



Keeping only nonsynonymous mutations


In [4]:
mosquito_passaging_mutations.sort_values("FREQ")

Unnamed: 0,reference_site,AA_REF,AA_ALT,FREQ,Species,Passage,Sample,mut_type,protein,mutation,293T_TIM1,293T_Mxra8 minus 293T_TIM1,293T_Mxra8,C636 minus 293T_TIM1,C636,293T_Mxra8 minus C636
23,167(E2),I,T,0.02,albopictus,15,U4-1,nonsynonymous,E2,I167(E2)T,-5.355,0.0,-7.381,0.0,-7.132,0.0
35,252(E2),K,N,0.02,albopictus,25,C6-2,nonsynonymous,E2,K252(E2)N,-2.909,-0.699,-3.608,-2.091,-5.514,1.392
37,257(E2),I,S,0.02,albopictus,15,U4-1,nonsynonymous,E2,I257(E2)S,-0.151,-0.331,-0.482,-2.596,-2.747,2.265
29,206(E2),S,A,0.02,albopictus,25,C6-2,nonsynonymous,E2,S206(E2)A,-0.236,-0.032,-0.268,-0.182,-0.419,0.151
76,258(E1),G,R,0.02,aegypti,38,AE-2,nonsynonymous,E1,G258(E1)R,-4.612,0.947,-3.665,-0.388,-5.468,1.335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,226(E1),V,A,1.00,aegypti,48,AE-1,nonsynonymous,E1,V226(E1)A,0.347,-0.006,0.342,0.394,0.741,-0.4
71,226(E1),V,A,1.00,aegypti,51,A20-1,nonsynonymous,E1,V226(E1)A,0.347,-0.006,0.342,0.394,0.741,-0.4
21,160(E2),T,S,1.00,aegypti,48,AE-1,nonsynonymous,E2,T160(E2)S,-0.43,0.077,-0.354,-2.301,-2.731,2.377
10,54(E2),I,T,1.00,aegypti,41,A20-2,nonsynonymous,E2,I54(E2)T,-1.014,0.039,-0.975,-0.545,-1.559,0.584


In [13]:
mutation_selection = alt.selection_point(on="mouseover", empty=False, fields=["mutation"])

min_frequency = alt.param(
    value=0.1,
    bind=alt.binding_range(name="min mutation frequency", min=0, max=1),
)

mosquito_passaging_chart = (
    alt.Chart(mosquito_passaging_mutations)
    .add_params(mutation_selection, min_frequency)
    .transform_filter(alt.datum["FREQ"] >= min_frequency)
    .transform_fold(muteffects.phenotypes, ["phenotype", "effect"])
    .encode(
        alt.X("effect:Q", title=None),
        alt.Y("Species:N", title=None),
        alt.Facet(
            "phenotype:N",
            columns=2,
            title=None,
            sort=muteffects.phenotypes,
            header=alt.Header(orient="bottom", labelPadding=0),
        ),
        alt.Color("protein"),
        strokeWidth=alt.condition(mutation_selection, alt.value(2), alt.value(.5)),
        stroke=alt.condition(mutation_selection, alt.value("red"), alt.value("gray")),
        tooltip=["mutation", alt.Tooltip("FREQ", format=".2f", title="frequency")],
    )
    .mark_point(filled=True, fillOpacity=0.4, size=40, strokeOpacity=1)
    .resolve_scale(x="independent")
)

mosquito_passaging_chart

Notes:
 - `T160(E2)S`: this mutation is at site where mutations are generally tolerated OK in 293T-Mxra8 and 293T-TIM1, but are deleterious in C6/36, including this specific mutation. This mutation fixed in some _A. aegypti_ passaging.
 - `Y9(E2)H`: measured to be really bad in all cells, as are virtually all mutations at this site. This site is also strictly conserved at `Y` among all natural CHIKV strains.

These do not make sense, and I wonder if something is off with the sequencing the paper?