# Design SARS-CoV-2 spike RBD mutants
This notebook chooses/designs spikes with mutant RBDs.

## Setup
Import Python modules:

In [1]:
import collections
import itertools
import json
import os
import subprocess
import tempfile
import urllib

import altair as alt

import Bio.Entrez
import Bio.SeqIO

import numpy

import pandas as pd

import ruamel.yaml as yaml

Read configuration:

In [2]:
with open("config.yaml") as f:
    config = yaml.YAML().load(f)

## Get the reference RBD

In [3]:
Bio.Entrez.email = "example@example.com"

print(f"Getting reference spike from accession {config['ref_spike']}")
with Bio.Entrez.efetch(id=config["ref_spike"], rettype="gb", retmode="text", db="protein") as f:
    ref_spike = Bio.SeqIO.read(f, "gb")
print(f"Got spike of length {len(ref_spike)}")

rbd_coords = config["rbd_coords"]

ref_rbd = {r: ref_spike[r - 1] for r in range(rbd_coords[0], rbd_coords[1] + 1)}

Getting reference spike from accession YP_009724390
Got spike of length 1273


## Get RBD mutations in each Pango lineage

In [4]:
pango_json = config["pango_json"]
print(f"Reading Pango clade definitions from {pango_json}")
with urllib.request.urlopen(pango_json) as url:
    pango_lineages = json.load(url)
print(f"Read definitions for {len(pango_lineages)} lineages")


def parse_spike_rbd_muts(lineage_d):
    """Parse spike RBD mutations from dict for a lineage."""
    rbd_muts = []
    for mut in lineage_d["aaSubstitutions"] + lineage_d["aaDeletions"]:
        if mut.startswith("S:"):
            mut = mut.split(":")[1]
            wt = mut[0]
            r = int(mut[1: -1])
            m = mut[-1]
            if rbd_coords[0] <= r <= rbd_coords[1]:
                assert wt == ref_rbd[r]
                rbd_muts.append((wt, r, m))
    return rbd_muts
            
pango_rbd_muts = {pango: parse_spike_rbd_muts(d) for (pango, d) in pango_lineages.items()}

Reading Pango clade definitions from https://raw.githubusercontent.com/corneliusroemer/pango-sequences/main/data/pango-consensus-sequences_summary.json
Read definitions for 3916 lineages


## Get and import the escape calculator

In [5]:
# get and import the module
_ = urllib.request.urlretrieve(
    config["escape_calculator_module_url"],
    "escapecalculator.py",
)

import escapecalculator

## Now make designs for each parent

In [6]:
repetition_downweight = config["repetition_downweight"]
allow_reversions_to_ref = config["allow_reversions_to_ref"]
categories = config["categories"]

all_mutants = []
for (parent, parent_d), category in itertools.product(config["parent_specs"].items(), categories):
    nmutants = parent_d["nmutants"]
    nmutations = categories[category]
    with open(parent_d["specs"]) as f:
        parent_config = yaml.YAML().load(f)

    # make a data frame that includes parent and ref amino acid at each site
    parent_aas = (
        # first get reference and parent amino acid for each site
        pd.Series(ref_rbd).rename_axis("site").rename("ref_amino_acid").reset_index()
        .merge(
            pd.DataFrame(
                pango_rbd_muts[parent],
                columns=["ref_amino_acid", "site", "parent_amino_acid"],
            ),
            how="outer",
            on=["ref_amino_acid", "site"],
        )
        .assign(
            parent_amino_acid=lambda x: x["parent_amino_acid"].where(
                x["parent_amino_acid"].notnull(), x["ref_amino_acid"])
        )
    )
    assert len(parent_aas) == parent_aas["site"].nunique() == len(ref_rbd)

    parent_muts_from_ref = (
        parent_aas
        .query("ref_amino_acid != parent_amino_acid")
        .set_index("site")
        ["parent_amino_acid"]
        .to_dict()
    )

    # set up escape calculator
    escape_calc = escapecalculator.EscapeCalculator(virus=parent_config["escapecalculator"]["virus"])
    # get sites that differ between parent and virus used to initialize escape calculator
    parent_escape_calc_virus_diff_sites = list(set([
        tup[1]
        for tup in set(pango_rbd_muts[parent]).symmetric_difference(
            pango_rbd_muts[parent_config["escapecalculator"]["virus"]]
        )
    ]))

    # get RBD deep mutational scanning data
    rbd_dms = (
        pd.read_csv(parent_config["rbd_dms"]["data"])
        .query("target == @parent_config['rbd_dms']['target']")
        .rename(columns={"position": "site", "mutant": "amino_acid"})
        [["site", "amino_acid", "delta_bind", "delta_expr"]]
        .assign(
            delta_bind=lambda x: x["delta_bind"].clip(upper=parent_config["rbd_dms"]["clip"]),
            delta_expr=lambda x: x["delta_expr"].clip(upper=parent_config["rbd_dms"]["clip"]),
        )
    )
    # normalize values to parent amino acids
    rbd_dms = (
        rbd_dms
        .merge(parent_aas[["site", "parent_amino_acid"]], on="site", validate="many_to_one")
        .query("parent_amino_acid == amino_acid")
        .rename(columns={"delta_expr": "parent_expr", "delta_bind": "parent_bind"})
        .drop(columns=["amino_acid", "parent_amino_acid"])
        .merge(rbd_dms, on="site", validate="one_to_many")
        .assign(
            delta_bind=lambda x: x["delta_bind"] - x["parent_bind"],
            delta_expr=lambda x: x["delta_expr"] - x["parent_expr"],
        )
        .drop(columns=["parent_expr", "parent_bind"])
        .rename(columns={"delta_bind": "rbd_delta_bind", "delta_expr": "rbd_delta_expr"})
    )
    assert len(rbd_dms)

    # get all-clade fitness estimates 
    fitness = (
        pd.read_csv(parent_config["fitness_estimates"]["fitness"])
        .query("gene == 'S'")
        .query("expected_count >= @parent_config['fitness_estimates']['fitness_min_count']")
        .rename(columns={"aa_site": "site", "aa": "amino_acid"})
        .assign(fitness=lambda x: x["fitness"].clip(upper=parent_config["fitness_estimates"]["clip"]))
        [["site", "amino_acid", "fitness"]]
    )
    assert len(fitness)

    # get clade fitness effects
    by_clade_fitness = (
        pd.read_csv(parent_config["fitness_estimates"]["by_clade"])
        .query("gene == 'S'")
        .query("clade == @parent_config['fitness_estimates']['clade']")
        .query(
            "(expected_count >= @parent_config['fitness_estimates']['clade_min_count'])"
            "or (actual_count >= @parent_config['fitness_estimates']['clade_min_count'])"
        )
        .rename(columns={"aa_site": "site", "mutant_aa": "amino_acid", "delta_fitness": "clade_fitness"})
        .assign(clade_fitness=lambda x: x["clade_fitness"].clip(upper=parent_config["fitness_estimates"]["clip"]))
        [["site", "amino_acid", "clade_fitness"]]
    )
    assert len(by_clade_fitness)

    # get spike DMS
    spike_dms = (
        pd.read_csv(parent_config["spike_dms"]["csv"])
        .rename(columns={"mutant": "amino_acid"})
        [["site", "amino_acid", "human sera escape", "spike mediated entry", "ACE2 binding"]]
    )
    # normalize values to parent amino acids
    spike_dms = (
        spike_dms
        .merge(parent_aas[["site", "parent_amino_acid"]], on="site", validate="many_to_one")
        .query("parent_amino_acid == amino_acid")
        .rename(
            columns={
                "human sera escape": "parent_human sera escape",
                "spike mediated entry": "parent_spike mediated entry",
                "ACE2 binding": "parent_ACE2 binding",
            }
        )
        .drop(columns=["amino_acid", "parent_amino_acid"])
        .merge(spike_dms, on="site", validate="one_to_many")
        .assign(
            spike_escape=lambda x: x["human sera escape"] - x["parent_human sera escape"],
            spike_entry=lambda x: x["spike mediated entry"] - x["parent_spike mediated entry"],
            spike_ACE2_binding=lambda x: x["ACE2 binding"] - x["parent_ACE2 binding"],
        )
        [["site", "amino_acid", "spike_escape", "spike_entry", "spike_ACE2_binding"]]
    )
    assert len(spike_dms)

    # now add each phenotype and its weight
    parent_phenotypes = (
        parent_aas
        # add fitness estimates, only keeping mutations with estimates
        .merge(fitness, on="site", validate="one_to_many", how="inner")
        .assign(fitness_weight=parent_config["fitness_estimates"]["weights"]["fitness"])
        # add clade fitness estimates, clipping lower at zero and also setting missing to zero
        .merge(by_clade_fitness, on=["site", "amino_acid"], validate="one_to_one", how="left")
        .assign(
            clade_fitness=lambda x: x["clade_fitness"].where(x["clade_fitness"] > 0, 0),
            clade_fitness_weight=parent_config["fitness_estimates"]["weights"]["by_clade_effect"],
        )
        # add RBD DMS, only keeping mutations with measurements
        .merge(rbd_dms, on=["site", "amino_acid"], validate="one_to_one", how="inner")
        .assign(
            rbd_delta_bind_weight=parent_config["rbd_dms"]["weights"]["delta_bind"],
            rbd_delta_expr_weight=parent_config["rbd_dms"]["weights"]["delta_expr"],
        )
        # add spike DMS, only keeping mutations with measurements
        .merge(spike_dms, on=["site", "amino_acid"], validate="one_to_one", how="inner")
        .assign(
            spike_escape_weight=parent_config["spike_dms"]["weights"]["sera escape"],
            spike_entry_weight=parent_config["spike_dms"]["weights"]["spike mediated entry"],
            spike_ACE2_binding_weight=parent_config["spike_dms"]["weights"]["ACE2 binding"],
        )
        # add mutations, then manual weights
        .assign(
            mutation=lambda x: x["parent_amino_acid"] + x["site"].astype(str) + x["amino_acid"],
            manual_weight=lambda x: x["mutation"].map(
                lambda m: parent_config["manual_weights"][m] if m in parent_config["manual_weights"] else 1
            ),
        )
    )

    # Design each mutant
    mut_counts = collections.defaultdict(int)  # count how many times each mutation already in a mutant
    for imutant in range(nmutants):
        mutant_name = f"{parent}-{category}-mutant-{imutant + 1}"
        print(f"\nDesigning {mutant_name}")
        
        mutated_sites = []  # sites mutated in this variant
        mutated_df = []
        mutations = []
        muts_from_ref = parent_muts_from_ref.copy()
        for imutation in range(nmutations):
            # we re-do escape calculator for each new mutated sites
            mutant_phenotypes = parent_phenotypes.merge(
                escape_calc.escape_per_site(parent_escape_calc_virus_diff_sites + mutated_sites)
                .rename(columns={"retained_escape": "escape_calc"})
                [["site", "escape_calc"]]
                .assign(escape_calc_weight=parent_config["escapecalculator"]["weight"]),
                on="site",
                validate="many_to_one",
            )

            # ignore parent to parent mutations
            mutant_phenotypes = mutant_phenotypes.query("parent_amino_acid != amino_acid")

            # potentially ignore mutations to reference
            if not config["allow_reversions_to_ref"]:
                mutant_phenotypes = mutant_phenotypes.query("ref_amino_acid != amino_acid")

            # get the phenotypes
            phenos = [
                c for c in mutant_phenotypes.columns
                if not c.endswith("_weight") and c not in {
                    "site", "amino_acid", "parent_amino_acid", "ref_amino_acid", "mutation", "manual_weight",
                }
            ]
            assert all(f"{p}_weight" in set(mutant_phenotypes.columns) for p in phenos)

            # assign score to each mutation
            mutant_phenotypes["score"] = mutant_phenotypes["manual_weight"] * mutant_phenotypes["escape_calc"]**mutant_phenotypes["escape_calc_weight"]
            for pheno in phenos:
                if pheno != "escape_calc":
                    mutant_phenotypes["score"] = mutant_phenotypes["score"] * numpy.exp(
                        mutant_phenotypes[pheno] * mutant_phenotypes[f"{pheno}_weight"]
                    )

            # re-weight mutations already added to variants
            mutant_phenotypes = (
                mutant_phenotypes
                .assign(
                    reweight=lambda x: config["repetition_downweight"]**x["mutation"].map(mut_counts),
                    score=lambda x: x["score"] * x["reweight"],
                )
            )

            # get top scoring mutation not in already mutated sites
            mutation_row = (
                mutant_phenotypes
                .query("site not in @mutated_sites")
                .query("score.notnull()")
                .sort_values("score")
                [["site", "mutation", "ref_amino_acid", "parent_amino_acid", "amino_acid", *phenos, "manual_weight", "score"]]
                .tail(1)
            )
            site = mutation_row["site"].iloc[0]
            parent_amino_acid = mutation_row["parent_amino_acid"].iloc[0]
            amino_acid = mutation_row["amino_acid"].iloc[0]
            mutation = mutation_row["mutation"].iloc[0]
            mutated_sites.append(site)
            mutated_df.append(mutation_row)
            mutations.append(mutation)
            mut_counts[mutation] += 1
            muts_from_ref[site] = amino_acid

        muts_from_ref = " ".join([f"{ref_rbd[site]}{site}{mut}" for (site, mut) in sorted(muts_from_ref.items())])
        mutations = " ".join(mutations)
        all_mutants.append((mutant_name, parent, mutations, muts_from_ref))
        mutated_df = pd.concat(mutated_df, ignore_index=True)
        print(f"Contains the following mutations relative to {parent}: {mutations}")
        display(mutated_df)

all_mutants = pd.DataFrame(
    all_mutants,
    columns=["name", "parent_strain", "mutations_from_parent", "mutations_from_reference"],
)


Designing KP.2-moderate-mutant-1
Contains the following mutations relative to KP.2: K444T F490S A348P K440R Q493E


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444T,K,K,T,1.8265,2.0,-0.1,-0.19,0.09751,0.1281,-0.3462,0.042782,1,0.277796
1,490,F490S,F,F,S,1.7816,1.7269,-0.03,0.14,0.1984,0.03862,-0.3511,0.023553,1,0.240268
2,348,A348P,A,A,P,0.26923,0.0,0.15,0.25,0.2214,0.02453,-0.2102,0.04941,1,0.15117
3,440,K440R,N,K,R,2.0,1.3768,-0.1,-0.18,-0.02675,-0.07078,-0.2379,0.035212,1,0.109925
4,493,Q493E,Q,Q,E,1.2333,0.0,-1.36,-0.07,0.008081,0.08247,-1.362,0.001624,200,0.081079



Designing KP.2-moderate-mutant-2
Contains the following mutations relative to KP.2: K444R F490V K478R A348S L456V


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444R,K,K,R,2.0,1.9911,-0.1,-0.04,-0.02104,-0.05194,0.04488,0.042782,1,0.249395
1,490,F490V,F,F,V,2.0,0.0,0.06,-0.06,0.3323,-0.08735,-0.4202,0.023553,1,0.128855
2,478,K478R,T,K,R,2.0,1.9233,-0.15,-0.05,0.02994,0.03784,-0.09573,0.010986,1,0.069274
3,348,A348S,A,A,S,0.22659,0.0,-0.12,-0.21,0.2003,0.005438,-0.1663,0.049364,1,0.067068
4,456,L456V,F,L,V,0.62052,2.0,0.06,0.4,0.0574,0.07415,-0.0898,0.008873,1,0.063732



Designing KP.2-moderate-mutant-3
Contains the following mutations relative to KP.2: K444M F490Y N417T Y453F A348P


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444M,K,K,M,2.0,0.0,-0.19,-0.15,0.1568,0.0321,-0.03545,0.042782,1,0.134408
1,490,F490Y,F,F,Y,1.2315,0.0,0.0,0.01,0.2721,0.07004,-0.08187,0.023553,1,0.102554
2,417,N417T,K,N,T,1.7919,1.9565,0.23,0.01,-0.2398,0.004675,0.3772,0.011005,1,0.053861
3,453,Y453F,Y,Y,F,1.0374,0.0,1.28,-0.41,0.02805,-0.0848,1.387,0.005651,1,0.045307
4,348,A348P,A,A,P,0.26923,0.0,0.15,0.25,0.2214,0.02453,-0.2102,0.048912,1,0.044894



Designing KP.2-moderate-mutant-4
Contains the following mutations relative to KP.2: K444T F490S D420N T346S I468L


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444T,K,K,T,1.8265,2.0,-0.1,-0.19,0.09751,0.1281,-0.3462,0.042782,1,0.083339
1,490,F490S,F,F,S,1.7816,1.7269,-0.03,0.14,0.1984,0.03862,-0.3511,0.023553,1,0.07208
2,420,D420N,D,D,N,0.066556,0.54259,-0.04,-0.18,0.5724,-0.05154,0.174,0.008488,1,0.0533
3,346,T346S,R,T,S,2.0,2.0,-0.12,-0.19,0.03927,-0.04703,0.15,0.007107,1,0.044558
4,468,I468L,I,I,L,-0.11917,0.0,0.03,-0.05,-0.05109,-0.0195,0.08824,0.054728,1,0.04444



Designing KP.2-aggressive-mutant-1
Contains the following mutations relative to KP.2: K444T F490S A348P K440R Q493E K478R L456V N417T


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444T,K,K,T,1.8265,2.0,-0.1,-0.19,0.09751,0.1281,-0.3462,0.042782,1,0.277796
1,490,F490S,F,F,S,1.7816,1.7269,-0.03,0.14,0.1984,0.03862,-0.3511,0.023553,1,0.240268
2,348,A348P,A,A,P,0.26923,0.0,0.15,0.25,0.2214,0.02453,-0.2102,0.04941,1,0.15117
3,440,K440R,N,K,R,2.0,1.3768,-0.1,-0.18,-0.02675,-0.07078,-0.2379,0.035212,1,0.109925
4,493,Q493E,Q,Q,E,1.2333,0.0,-1.36,-0.07,0.008081,0.08247,-1.362,0.001624,200,0.081079
5,478,K478R,T,K,R,2.0,1.9233,-0.15,-0.05,0.02994,0.03784,-0.09573,0.010447,1,0.065872
6,456,L456V,F,L,V,0.62052,2.0,0.06,0.4,0.0574,0.07415,-0.0898,0.007826,1,0.05621
7,417,N417T,K,N,T,1.7919,1.9565,0.23,0.01,-0.2398,0.004675,0.3772,0.010494,1,0.051362



Designing KP.2-aggressive-mutant-2
Contains the following mutations relative to KP.2: K444R F490V A348S D420N Y453F K478E I468L H445A


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444R,K,K,R,2.0,1.9911,-0.1,-0.04,-0.02104,-0.05194,0.04488,0.042782,1,0.249395
1,490,F490V,F,F,V,2.0,0.0,0.06,-0.06,0.3323,-0.08735,-0.4202,0.023553,1,0.128855
2,348,A348S,A,A,S,0.22659,0.0,-0.12,-0.21,0.2003,0.005438,-0.1663,0.04941,1,0.067131
3,420,D420N,D,D,N,0.066556,0.54259,-0.04,-0.18,0.5724,-0.05154,0.174,0.008382,1,0.052631
4,453,Y453F,Y,Y,F,1.0374,0.0,1.28,-0.41,0.02805,-0.0848,1.387,0.005517,1,0.04423
5,478,K478E,T,K,E,0.90843,2.0,-0.1,0.1,0.0102,0.08829,-0.4404,0.010554,1,0.04083
6,468,I468L,I,I,L,-0.11917,0.0,0.03,-0.05,-0.05109,-0.0195,0.08824,0.047804,1,0.038818
7,445,H445A,V,H,A,2.0,0.0,-0.03,-0.03,0.2597,-0.10922,-0.22555,0.007383,1,0.032994



Designing KP.2-aggressive-mutant-3
Contains the following mutations relative to KP.2: K444M F490Y A348P S494P L441R A475S I468M K478I


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444M,K,K,M,2.0,0.0,-0.19,-0.15,0.1568,0.0321,-0.03545,0.042782,1,0.134408
1,490,F490Y,F,F,Y,1.2315,0.0,0.0,0.01,0.2721,0.07004,-0.08187,0.023553,1,0.102554
2,348,A348P,A,A,P,0.26923,0.0,0.15,0.25,0.2214,0.02453,-0.2102,0.04941,1,0.045351
3,494,S494P,S,S,P,2.0,1.3904,-0.05,-0.15,0.1206,0.06785,0.2635,0.004867,1,0.038057
4,441,L441R,L,L,R,1.1699,2.0,-0.08,-0.31,-0.03251,0.02405,0.06132,0.011039,1,0.034938
5,475,A475S,A,A,S,-0.7456,1.3679,-0.4,-0.09,0.1848,-0.08265,-0.4831,0.032335,1,0.034038
6,468,I468M,I,I,M,-0.80286,0.0,-0.05,-0.02,0.006988,-0.001435,0.09491,0.04787,1,0.031946
7,478,K478I,T,K,I,1.0225,2.0,-0.06,-0.14,-0.02297,-0.00556,-0.1833,0.009163,1,0.028798



Designing KP.2-aggressive-mutant-4
Contains the following mutations relative to KP.2: K444T F490S T346S E516Q K440R K462R K478N N354K


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,444,K444T,K,K,T,1.8265,2.0,-0.1,-0.19,0.09751,0.1281,-0.3462,0.042782,1,0.083339
1,490,F490S,F,F,S,1.7816,1.7269,-0.03,0.14,0.1984,0.03862,-0.3511,0.023553,1,0.07208
2,346,T346S,R,T,S,2.0,2.0,-0.12,-0.19,0.03927,-0.04703,0.15,0.00718,1,0.045018
3,516,E516Q,E,E,Q,2.0,1.7669,-0.09,-0.3,-0.02956,-0.1835,0.1909,0.009233,1,0.034451
4,440,K440R,N,K,R,2.0,1.3768,-0.1,-0.18,-0.02675,-0.07078,-0.2379,0.03565,1,0.033388
5,462,K462R,K,K,R,-1.0934,0.27815,-0.05,-0.17,0.2519,0.06153,-0.6189,0.033521,1,0.029735
6,478,K478N,T,K,N,0.74466,1.0815,-0.11,-0.12,0.1104,0.08826,-0.1795,0.01013,1,0.027891
7,354,N354K,N,N,K,1.8031,0.0,0.04,0.11,0.1111,0.0256,-0.2343,0.007604,1,0.027716



Designing HK.3.2-moderate-mutant-1
Contains the following mutations relative to HK.3.2: K356T K444T S494P K440R Y453F


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,356,K356T,K,K,T,2.0,2.0,-0.06,0.05,0.3649,-0.1362,-0.3785,0.044102,1,0.696276
1,444,K444T,K,K,T,1.8265,2.0,-0.18,0.19,0.09751,0.1281,-0.3462,0.059286,1,0.51965
2,494,S494P,S,S,P,2.0,2.0,0.26,0.07,0.1206,0.06785,0.2635,0.018805,1,0.338834
3,440,K440R,N,K,R,2.0,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.102592,1,0.259445
4,453,Y453F,Y,Y,F,1.0374,0.98161,0.85,-0.03,0.02805,-0.0848,1.387,0.010968,1,0.136637



Designing HK.3.2-moderate-mutant-2
Contains the following mutations relative to HK.3.2: P445A K356T N450D S490Y L452R


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,445,P445A,V,P,A,2.0,0.0,0.03,0.0,0.129,-0.1237,-0.05195,0.077691,1,0.275906
1,356,K356T,K,K,T,2.0,2.0,-0.06,0.05,0.3649,-0.1362,-0.3785,0.042954,1,0.203449
2,450,N450D,N,N,D,1.5604,0.99359,0.04,0.01,0.2437,0.008255,-0.08877,0.021425,1,0.161823
3,490,S490Y,F,S,Y,1.2315,0.73112,0.23,0.05,0.0737,0.03142,0.26923,0.024935,1,0.129633
4,452,L452R,L,L,R,0.0,2.0,0.17,-0.01,0.1816,0.1286,0.1571,0.013411,1,0.090742



Designing HK.3.2-aggressive-mutant-1
Contains the following mutations relative to HK.3.2: K356T K444T S494P K440R Y453F L452R E471Q K478R


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,356,K356T,K,K,T,2.0,2.0,-0.06,0.05,0.3649,-0.1362,-0.3785,0.044102,1,0.696276
1,444,K444T,K,K,T,1.8265,2.0,-0.18,0.19,0.09751,0.1281,-0.3462,0.059286,1,0.51965
2,494,S494P,S,S,P,2.0,2.0,0.26,0.07,0.1206,0.06785,0.2635,0.018805,1,0.338834
3,440,K440R,N,K,R,2.0,0.41552,-0.07,0.06,-0.02675,-0.07078,-0.2379,0.102592,1,0.259445
4,453,Y453F,Y,Y,F,1.0374,0.98161,0.85,-0.03,0.02805,-0.0848,1.387,0.010968,1,0.136637
5,452,L452R,L,L,R,0.0,2.0,0.17,-0.01,0.1816,0.1286,0.1571,0.013835,1,0.093609
6,471,E471Q,E,E,Q,2.0,2.0,-0.14,-0.03,0.3125,-0.04541,-0.3085,0.00606,1,0.079005
7,478,K478R,T,K,R,2.0,2.0,-0.24,-0.02,0.02994,0.03784,-0.09573,0.010601,1,0.065413



Designing HK.3.2-aggressive-mutant-2
Contains the following mutations relative to HK.3.2: P445A K356T N450D S490Y Q493L K440T L452Q N405Y


Unnamed: 0,site,mutation,ref_amino_acid,parent_amino_acid,amino_acid,fitness,clade_fitness,rbd_delta_bind,rbd_delta_expr,spike_escape,spike_entry,spike_ACE2_binding,escape_calc,manual_weight,score
0,445,P445A,V,P,A,2.0,0.0,0.03,0.0,0.129,-0.1237,-0.05195,0.077691,1,0.275906
1,356,K356T,K,K,T,2.0,2.0,-0.06,0.05,0.3649,-0.1362,-0.3785,0.042954,1,0.203449
2,450,N450D,N,N,D,1.5604,0.99359,0.04,0.01,0.2437,0.008255,-0.08877,0.021425,1,0.161823
3,490,S490Y,F,S,Y,1.2315,0.73112,0.23,0.05,0.0737,0.03142,0.26923,0.024935,1,0.129633
4,493,Q493L,Q,Q,L,2.0,2.0,0.34,-0.07,0.0355,0.1395,0.9326,0.004313,1,0.085118
5,440,K440T,N,K,T,0.9548,0.0,-0.35,0.11,0.1273,0.00435,-0.909,0.060977,1,0.072214
6,452,L452Q,L,L,Q,0.51019,1.388,0.18,-0.02,0.2096,-0.1389,0.197,0.010059,1,0.054924
7,405,N405Y,D,N,Y,-0.96129,0.0,0.19,-0.17,0.2114,0.05109,-0.1583,0.042665,1,0.049347


Write designed mutants to file:

In [7]:
print(f"Writing designed mutants to {config['designed_mutants']}")
all_mutants.to_csv(config["designed_mutants"], index=False)

Writing designed mutants to designed_mutants.csv
