# Design SARS-CoV-2 spike RBD mutants
This notebook chooses/designs spikes with mutant RBDs.

## Setup
Import Python modules:

In [1]:
import collections
import itertools
import json
import os
import subprocess
import tempfile
import urllib

import altair as alt

import Bio.Entrez
import Bio.SeqIO

import numpy

import pandas as pd

import ruamel.yaml as yaml

Read configuration:

In [2]:
with open("config.yaml") as f:
    config = yaml.YAML().load(f)

## Get the reference RBD

In [3]:
Bio.Entrez.email = "example@example.com"

print(f"Getting reference spike from accession {config['ref_spike']}")
with Bio.Entrez.efetch(id=config["ref_spike"], rettype="gb", retmode="text", db="protein") as f:
    ref_spike = Bio.SeqIO.read(f, "gb")
print(f"Got spike of length {len(ref_spike)}")

rbd_coords = config["rbd_coords"]

ref_rbd = {r: ref_spike[r - 1] for r in range(rbd_coords[0], rbd_coords[1] + 1)}

Getting reference spike from accession YP_009724390
Got spike of length 1273


## Get RBD mutations in each Pango lineage

In [4]:
pango_json = config["pango_json"]
print(f"Reading Pango clade definitions from {pango_json}")
with urllib.request.urlopen(pango_json) as url:
    pango_lineages = json.load(url)
print(f"Read definitions for {len(pango_lineages)} lineages")


def parse_spike_rbd_muts(lineage_d):
    """Parse spike RBD mutations from dict for a lineage."""
    rbd_muts = []
    for mut in lineage_d["aaSubstitutions"] + lineage_d["aaDeletions"]:
        if mut.startswith("S:"):
            mut = mut.split(":")[1]
            wt = mut[0]
            r = int(mut[1: -1])
            m = mut[-1]
            if rbd_coords[0] <= r <= rbd_coords[1]:
                assert wt == ref_rbd[r]
                rbd_muts.append((wt, r, m))
    return rbd_muts
            
pango_rbd_muts = {pango: parse_spike_rbd_muts(d) for (pango, d) in pango_lineages.items()}

Reading Pango clade definitions from https://raw.githubusercontent.com/corneliusroemer/pango-sequences/main/data/pango-consensus-sequences_summary.json
Read definitions for 3916 lineages


## Get and import the escape calculator

In [5]:
# get and import the module
_ = urllib.request.urlretrieve(
    config["escape_calculator_module_url"],
    "escapecalculator.py",
)

import escapecalculator

## Now make designs for each parent

In [6]:
repetition_downweight = config["repetition_downweight"]
allow_reversions_to_ref = config["allow_reversions_to_ref"]
categories = config["categories"]

all_mutants = []
for (parent, parent_d), category in itertools.product(config["parent_specs"].items(), categories):
    nmutants = parent_d["nmutants"]
    nmutations = categories[category]
    with open(parent_d["specs"]) as f:
        parent_config = yaml.YAML().load(f)

    # make a data frame that includes parent and ref amino acid at each site
    parent_aas = (
        # first get reference and parent amino acid for each site
        pd.Series(ref_rbd).rename_axis("site").rename("ref_amino_acid").reset_index()
        .merge(
            pd.DataFrame(
                pango_rbd_muts[parent],
                columns=["ref_amino_acid", "site", "parent_amino_acid"],
            ),
            how="outer",
            on=["ref_amino_acid", "site"],
        )
        .assign(
            parent_amino_acid=lambda x: x["parent_amino_acid"].where(
                x["parent_amino_acid"].notnull(), x["ref_amino_acid"])
        )
    )
    assert len(parent_aas) == parent_aas["site"].nunique() == len(ref_rbd)

    parent_muts_from_ref = (
        parent_aas
        .query("ref_amino_acid != parent_amino_acid")
        .set_index("site")
        ["parent_amino_acid"]
        .to_dict()
    )

    # set up escape calculator
    escape_calc = escapecalculator.EscapeCalculator(virus=parent_config["escapecalculator"]["virus"])
    # get sites that differ between parent and virus used to initialize escape calculator
    parent_escape_calc_virus_diff_sites = list(set([
        tup[1]
        for tup in set(pango_rbd_muts[parent]).symmetric_difference(
            pango_rbd_muts[parent_config["escapecalculator"]["virus"]]
        )
    ]))

    # get RBD deep mutational scanning data
    rbd_dms = (
        pd.read_csv(parent_config["rbd_dms"]["data"])
        .query("target == @parent_config['rbd_dms']['target']")
        .rename(columns={"position": "site", "mutant": "amino_acid"})
        [["site", "amino_acid", "delta_bind", "delta_expr"]]
        .assign(
            delta_bind=lambda x: x["delta_bind"].clip(upper=parent_config["rbd_dms"]["clip"]),
            delta_expr=lambda x: x["delta_expr"].clip(upper=parent_config["rbd_dms"]["clip"]),
        )
    )
    # normalize values to parent amino acids
    rbd_dms = (
        rbd_dms
        .merge(parent_aas[["site", "parent_amino_acid"]], on="site", validate="many_to_one")
        .query("parent_amino_acid == amino_acid")
        .rename(columns={"delta_expr": "parent_expr", "delta_bind": "parent_bind"})
        .drop(columns=["amino_acid", "parent_amino_acid"])
        .merge(rbd_dms, on="site", validate="one_to_many")
        .assign(
            delta_bind=lambda x: x["delta_bind"] - x["parent_bind"],
            delta_expr=lambda x: x["delta_expr"] - x["parent_expr"],
        )
        .drop(columns=["parent_expr", "parent_bind"])
        .rename(columns={"delta_bind": "rbd_delta_bind", "delta_expr": "rbd_delta_expr"})
    )
    assert len(rbd_dms)

    # get all-clade fitness estimates 
    fitness = (
        pd.read_csv(parent_config["fitness_estimates"]["fitness"])
        .query("gene == 'S'")
        .query("expected_count >= @parent_config['fitness_estimates']['fitness_min_count']")
        .rename(columns={"aa_site": "site", "aa": "amino_acid"})
        .assign(fitness=lambda x: x["fitness"].clip(upper=parent_config["fitness_estimates"]["clip"]))
        [["site", "amino_acid", "fitness"]]
    )
    assert len(fitness)

    # get clade fitness effects
    by_clade_fitness = (
        pd.read_csv(parent_config["fitness_estimates"]["by_clade"])
        .query("gene == 'S'")
        .query("clade == @parent_config['fitness_estimates']['clade']")
        .query(
            "(expected_count >= @parent_config['fitness_estimates']['clade_min_count'])"
            "or (actual_count >= @parent_config['fitness_estimates']['clade_min_count'])"
        )
        .rename(columns={"aa_site": "site", "mutant_aa": "amino_acid", "delta_fitness": "clade_fitness"})
        .assign(fitness=lambda x: x["clade_fitness"].clip(upper=parent_config["fitness_estimates"]["clip"]))
        [["site", "amino_acid", "clade_fitness"]]
    )
    assert len(by_clade_fitness)

    # get spike DMS
    spike_dms = (
        pd.read_csv(parent_config["spike_dms"]["csv"])
        .rename(columns={"mutant": "amino_acid"})
        [["site", "amino_acid", "human sera escape", "spike mediated entry", "ACE2 binding"]]
    )
    # normalize values to parent amino acids
    spike_dms = (
        spike_dms
        .merge(parent_aas[["site", "parent_amino_acid"]], on="site", validate="many_to_one")
        .query("parent_amino_acid == amino_acid")
        .rename(
            columns={
                "human sera escape": "parent_human sera escape",
                "spike mediated entry": "parent_spike mediated entry",
                "ACE2 binding": "parent_ACE2 binding",
            }
        )
        .drop(columns=["amino_acid", "parent_amino_acid"])
        .merge(spike_dms, on="site", validate="one_to_many")
        .assign(
            spike_escape=lambda x: x["human sera escape"] - x["parent_human sera escape"],
            spike_entry=lambda x: x["spike mediated entry"] - x["parent_spike mediated entry"],
            spike_ACE2_binding=lambda x: x["ACE2 binding"] - x["parent_ACE2 binding"],
        )
        [["site", "amino_acid", "spike_escape", "spike_entry", "spike_ACE2_binding"]]
    )
    assert len(spike_dms)

    # now add each phenotype and its weight
    parent_phenotypes = (
        parent_aas
        # add fitness estimates, only keeping mutations with estimates
        .merge(fitness, on="site", validate="one_to_many", how="inner")
        .assign(fitness_weight=parent_config["fitness_estimates"]["weights"]["fitness"])
        # add clade fitness estimates, clipping lower at zero and also setting missing to zero
        .merge(by_clade_fitness, on=["site", "amino_acid"], validate="one_to_one", how="left")
        .assign(
            clade_fitness=lambda x: x["clade_fitness"].where(x["clade_fitness"] > 0, 0),
            clade_fitness_weight=parent_config["fitness_estimates"]["weights"]["by_clade_effect"],
        )
        # add RBD DMS, only keeping mutations with measurements
        .merge(rbd_dms, on=["site", "amino_acid"], validate="one_to_one", how="inner")
        .assign(
            rbd_delta_bind_weight=parent_config["rbd_dms"]["weights"]["delta_bind"],
            rbd_delta_expr_weight=parent_config["rbd_dms"]["weights"]["delta_expr"],
        )
        # add spike DMS, only keeping mutations with measurements
        .merge(spike_dms, on=["site", "amino_acid"], validate="one_to_one", how="inner")
        .assign(
            spike_escape_weight=parent_config["spike_dms"]["weights"]["sera escape"],
            spike_entry_weight=parent_config["spike_dms"]["weights"]["spike mediated entry"],
            spike_ACE2_binding_weight=parent_config["spike_dms"]["weights"]["ACE2 binding"],
        )
    )

    # Design each mutant
    mut_counts = collections.defaultdict(int)  # count how many times each mutation already in a mutant
    for imutant in range(nmutants):
        mutant_name = f"{parent}-{category}-mutant-{imutant + 1}"
        print(f"\nDesigning {mutant_name}")
        
        mutated_sites = []  # sites mutated in this variant
        mutated_df = []
        mutations = []
        muts_from_ref = parent_muts_from_ref.copy()
        for imutation in range(nmutations):
            # we re-do escape calculator for each new mutated sites
            mutant_phenotypes = parent_phenotypes.merge(
                escape_calc.escape_per_site(parent_escape_calc_virus_diff_sites + mutated_sites)
                .rename(columns={"retained_escape": "escape_calc"})
                [["site", "escape_calc"]]
                .assign(escape_calc_weight=parent_config["escapecalculator"]["weight"]),
                on="site",
                validate="many_to_one",
            )

            # ignore parent to parent mutations
            mutant_phenotypes = mutant_phenotypes.query("parent_amino_acid != amino_acid")

            # potentially ignore mutations to reference
            if not config["allow_reversions_to_ref"]:
                mutant_phenotypes = mutant_phenotypes.query("ref_amino_acid != amino_acid")

            # get the phenotypes
            phenos = [
                c for c in mutant_phenotypes.columns
                if not c.endswith("_weight") and c not in {"site", "amino_acid", "parent_amino_acid", "ref_amino_acid"}
            ]
            assert all(f"{p}_weight" in set(mutant_phenotypes.columns) for p in phenos)

            # assign score to each mutation
            mutant_phenotypes["score"] = mutant_phenotypes["escape_calc"]**mutant_phenotypes["escape_calc_weight"]
            for pheno in phenos:
                if pheno != "escape_calc":
                    mutant_phenotypes["score"] = mutant_phenotypes["score"] * numpy.exp(
                        mutant_phenotypes[pheno] * mutant_phenotypes[f"{pheno}_weight"]
                    )

            # re-weight mutations already added to variants
            mutant_phenotypes = (
                mutant_phenotypes
                .assign(
                    mutation=lambda x: x["parent_amino_acid"] + x["site"].astype(str) + x["amino_acid"],
                    reweight=lambda x: config["repetition_downweight"]**x["mutation"].map(mut_counts),
                    score=lambda x: x["score"] * x["reweight"],
                )
            )

            # get top scoring mutation not in already mutated sites
            mutation_row = (
                mutant_phenotypes
                .query("site not in @mutated_sites")
                .query("score.notnull()")
                .sort_values("score")
                [["site", "mutation", "ref_amino_acid", "parent_amino_acid", "amino_acid", *phenos]]
                .tail(1)
            )
            site = mutation_row["site"].iloc[0]
            parent_amino_acid = mutation_row["parent_amino_acid"].iloc[0]
            amino_acid = mutation_row["amino_acid"].iloc[0]
            mutation = mutation_row["mutation"].iloc[0]
            mutated_sites.append(site)
            mutated_df.append(mutation_row)
            mutations.append(mutation)
            mut_counts[mutation] += 1
            muts_from_ref[site] = amino_acid

        muts_from_ref = " ".join([f"{ref_rbd[site]}{site}{mut}" for (site, mut) in sorted(muts_from_ref.items())])
        mutations = " ".join(mutations)
        all_mutants.append((mutant_name, parent, mutations, muts_from_ref))
        mutated_df = pd.concat(mutated_df, ignore_index=True)
        print(f"Contains the following mutations relative to {parent}: {mutations}")
        display(mutated_df)

all_mutants = pd.DataFrame(
    all_mutants,
    columns=["name", "parent_strain", "mutations_from_parent", "mutations_from_reference"],
)


Designing KQ.1-moderate-mutant-1
Contains the following mutations relative to KQ.1: S494P F456L K478R K440R Q493L E471Q


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007199
3,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.049314
4,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.001324
5,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.004082



Designing KQ.1-moderate-mutant-2
Contains the following mutations relative to KQ.1: S494P F456L K478R E340Q K444T K440R


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007199
3,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.00638
4,444,K444T,K,K,T,1.8265,2.1179,-0.18,0.19,0.09751,0.1281,-0.3462,0.012748
5,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.037727



Designing KQ.1-moderate-mutant-3
Contains the following mutations relative to KQ.1: S494P F456L K478I E340Q E516Q Y453F


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478I,T,K,I,1.0225,3.8328,-0.09,0.16,-0.02297,-0.00556,-0.1833,0.007199
3,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.00638
4,516,E516Q,E,E,Q,2.1417,1.916,-0.02,0.07,-0.02956,-0.1835,0.1909,0.006185
5,453,Y453F,Y,Y,F,1.0374,0.98161,0.85,-0.03,0.02805,-0.0848,1.387,0.005554



Designing KQ.1-moderate-mutant-4
Contains the following mutations relative to KQ.1: S494P Q493L K478R F456L E471Q K444R


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.004091
2,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007954
3,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.037844
4,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.004537
5,444,K444R,K,K,R,2.3704,0.94358,-0.09,-0.05,-0.02104,-0.05194,0.04488,0.013152



Designing KQ.1-moderate-mutant-5
Contains the following mutations relative to KQ.1: S494P Q493L K478I W452R K444T F456L


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.004091
2,478,K478I,T,K,I,1.0225,3.8328,-0.09,0.16,-0.02297,-0.00556,-0.1833,0.007954
3,452,W452R,L,W,R,0.0,3.2065,0.15,-0.03,0.27777,0.08751,0.148502,0.008457
4,444,K444T,K,K,T,1.8265,2.1179,-0.18,0.19,0.09751,0.1281,-0.3462,0.014532
5,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.036703



Designing KQ.1-aggressive-mutant-1
Contains the following mutations relative to KQ.1: S494P F456L K478R K440R Q493L E471Q E340Q E516Q K444T Y453F N354K E406Q


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007199
3,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.049314
4,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.001324
5,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.004082
6,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.004215
7,516,E516Q,E,E,Q,2.1417,1.916,-0.02,0.07,-0.02956,-0.1835,0.1909,0.005926
8,444,K444T,K,K,T,1.8265,2.1179,-0.18,0.19,0.09751,0.1281,-0.3462,0.006198
9,453,Y453F,Y,Y,F,1.0374,0.98161,0.85,-0.03,0.02805,-0.0848,1.387,0.003884



Designing KQ.1-aggressive-mutant-2
Contains the following mutations relative to KQ.1: S494P F456L K478R K440R Q493L E471Q E340Q W452R E516Q K444R Y453F N405G


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007199
3,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.049314
4,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.001324
5,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.004082
6,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.004215
7,452,W452R,L,W,R,0.0,3.2065,0.15,-0.03,0.27777,0.08751,0.148502,0.003806
8,516,E516Q,E,E,Q,2.1417,1.916,-0.02,0.07,-0.02956,-0.1835,0.1909,0.005926
9,444,K444R,K,K,R,2.3704,0.94358,-0.09,-0.05,-0.02104,-0.05194,0.04488,0.006198



Designing KQ.1-aggressive-mutant-3
Contains the following mutations relative to KQ.1: S494P F456L K478I K444T K440R Q493L E471Q E340Q H519L N354K G482R A522P


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.042399
2,478,K478I,T,K,I,1.0225,3.8328,-0.09,0.16,-0.02297,-0.00556,-0.1833,0.007199
3,444,K444T,K,K,T,1.8265,2.1179,-0.18,0.19,0.09751,0.1281,-0.3462,0.013526
4,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.039905
5,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.001231
6,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.004019
7,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.003982
8,519,H519L,H,H,L,2.1759,1.8506,-0.1,-0.21,-0.09808,-0.2163,0.1494,0.002889
9,354,N354K,N,N,K,1.8031,2.6735,0.08,-0.12,0.1111,0.0256,-0.2343,0.002586



Designing KQ.1-aggressive-mutant-4
Contains the following mutations relative to KQ.1: S494P K478R F456L K444R W452R K440R E516Q E406Q Q493L N405A T346S S446I


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,478,K478R,T,K,R,2.1611,3.912,-0.24,-0.02,0.02994,0.03784,-0.09573,0.007982
2,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.041591
3,444,K444R,K,K,R,2.3704,0.94358,-0.09,-0.05,-0.02104,-0.05194,0.04488,0.013526
4,452,W452R,L,W,R,0.0,3.2065,0.15,-0.03,0.27777,0.08751,0.148502,0.005473
5,440,K440R,N,K,R,3.0158,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.039905
6,516,E516Q,E,E,Q,2.1417,1.916,-0.02,0.07,-0.02956,-0.1835,0.1909,0.006128
7,406,E406Q,E,E,Q,1.8261,2.231,0.24,0.12,0.05102,-0.2641,0.157,0.002246
8,493,Q493L,Q,Q,L,2.2219,2.6793,0.34,-0.07,0.0355,0.1395,0.9326,0.001164
9,405,N405A,D,N,A,-0.003964,0.0,0.54,-0.14,-0.4994,-0.1801,1.559,0.020067



Designing KQ.1-aggressive-mutant-5
Contains the following mutations relative to KQ.1: S494P K478I F456L K444T H445A E340Q Y453F K481Y E471Q H519L T346K N405G


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc
0,494,S494P,S,S,P,3.083,3.4873,0.26,0.07,0.1206,0.06785,0.2635,0.006304
1,478,K478I,T,K,I,1.0225,3.8328,-0.09,0.16,-0.02297,-0.00556,-0.1833,0.007982
2,456,F456L,F,F,L,1.5207,3.0509,0.29,0.05,0.2001,-0.02328,-0.3013,0.041591
3,444,K444T,K,K,T,1.8265,2.1179,-0.18,0.19,0.09751,0.1281,-0.3462,0.013526
4,445,H445A,V,H,A,2.1033,0.0,-0.01,0.29,0.2597,-0.10922,-0.22555,0.010787
5,340,E340Q,E,E,Q,2.0132,2.9106,-0.05,0.08,-0.1424,-0.1069,0.1744,0.005771
6,453,Y453F,Y,Y,F,1.0374,0.98161,0.85,-0.03,0.02805,-0.0848,1.387,0.005331
7,481,K481Y,N,K,Y,1.0208,2.6214,0.03,0.0,0.02822,-0.1747,-0.32765,0.003015
8,471,E471Q,E,E,Q,2.3908,2.7945,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.00399
9,519,H519L,H,H,L,2.1759,1.8506,-0.1,-0.21,-0.09808,-0.2163,0.1494,0.003846


Write designed mutants to file:

In [7]:
print(f"Writing designed mutants to {config['designed_mutants']}")
all_mutants.to_csv(config["designed_mutants"], index=False)

Writing designed mutants to designed_mutants.csv
