# Configure structure based analysis for `dms-viz`

In [None]:
import os
import subprocess
import textwrap
import pandas as pd

File paths for data:

In [None]:
# this cell is tagged as `parameters` for papermill parameterization
site_map = None
func_scores = None
antibody_escape_dir = None

dms_viz_subdir = None
dms_viz_phenotypes = None
dms_viz_json = None

pdb_file = None
times_seen = None
n_selections = None
frac_models = None
cell_entry_default = None

In [None]:
# # Uncomment for running interactive
# site_map = "../data/site_numbering_map.csv"
# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"
# antibody_escape_dir = "../results/antibody_escape/averages/"

# dms_viz_subdir = "../results/dms_viz/"
# dms_viz_phenotypes = "../results/dms_viz/phenotypes.csv" 
# dms_viz_json = "../results/dms_viz/dms_viz.json"

# pdb_file = "7puy"
# times_seen = 2
# n_selections = 8
# frac_models = 1
# cell_entry_default = -3

Process data by filtering functional scores and antibody escape data

In [None]:
# Process functional scores
phenotypes = pd.read_csv(func_scores)
phenotypes = (
    phenotypes
    .rename(columns={"effect" : "cell_entry"})
    .query("mutant != '-' & n_selections >=@n_selections & times_seen >= @times_seen")
)
phenotypes["mutation"] = (
    phenotypes["wildtype"] + phenotypes["site"].astype(str) + phenotypes["mutant"]
)

# Process antibody escape data
antibodies = [
    "S4378",
    "S43711",
    "S43720",
    "S43727",
    "S43742",
    "S44428",
    "S44433",
    "S44446",
    "S44478",
    "S43752",
]

for antibody in antibodies:
    curr_df = pd.read_csv(antibody_escape_dir+antibody+"_mut_effect.csv")
    curr_df = (
        curr_df
        .query("times_seen >= @times_seen & frac_models == @frac_models")
        .drop(columns = curr_df.columns.difference([
            "mutation",
            "escape_median"
        ]))
        .rename(columns={
            "escape_median" : antibody + "_escape"
        })
    )
    phenotypes = (
        phenotypes.merge(
            curr_df,
            how="left",
            on="mutation",
            validate="one_to_one",
        )
    )

# Make output dir if doesn't exist
if not os.path.exists(dms_viz_subdir):
    os.mkdir(dms_viz_subdir)

phenotypes.to_csv(dms_viz_phenotypes, index=False)

In [None]:
phenotype_cols = {
    # phenotype columns and additional arguments to `configure-dms-viz`
    "S4378_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S43711_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S43720_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S43727_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S43742_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S43752_escape": ["--floor", "False", "--summary-stat", "sum"],
    "S44428_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S44433_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S44446_escape": ["--floor", "True", "--summary-stat", "sum"],
    "S44478_escape": ["--floor", "True", "--summary-stat", "sum"],
    "cell_entry": ["--floor", "False", "--summary-stat", "mean"],
}

# additional tooltips to show
tooltip_cols = {
    c: c.replace("_", " ")
    for c in list(phenotype_cols) + ["mutation"]
}

assert set(tooltip_cols).issubset(phenotypes.columns)

filter_cols = ["cell_entry"]
min_value = phenotypes["cell_entry"].min().tolist() # have to convert to float
filter_limits = {"cell_entry" : [min_value, cell_entry_default, 0]}

print(filter_limits)

In [None]:
pheno_jsons = []
for pheno_col, pheno_args in phenotype_cols.items():
    pheno_json = os.path.join(dms_viz_subdir, f"{pheno_col}.json")
    print(f"Writing phenotype {pheno_col} to {pheno_json}")
    cmds = [
        "configure-dms-viz", "format",
        "--name", pheno_col.replace("_", " "),
        "--input", dms_viz_phenotypes,
        "--metric", pheno_col,
        "--structure", pdb_file,
        "--sitemap", site_map,
        "--tooltip-cols", str({k: v for (k, v) in tooltip_cols.items() if k != pheno_col}),
        "--alphabet", "RKHDEQNSTYWFAILMVGPC*",
        "--output", pheno_json,
        "--title", f"Effects of mutations to Lassa virus GPC (Josiah strain) on {pheno_col.replace('_', ' ')}",
        "--description", pheno_col,
        *pheno_args,
    ]
    pheno_filter_cols = {c: c for c in filter_cols if c != pheno_col}
    if pheno_filter_cols:
        cmds += ["--filter-cols", str(pheno_filter_cols)]
        cmds += ["--filter-limits", str({c: filter_limits[c] for c in pheno_filter_cols})]
    subprocess.run(cmds, check=True)
    pheno_jsons.append(pheno_json)

markdown_description = os.path.join(dms_viz_subdir, "description.md")
with open(markdown_description, "w") as f:
    f.write(
        textwrap.dedent(
            """\
            # Effects of mutations to Lassa virus GPC as measured by deep mutational scanning
            This is an interactive structure-based visualization of the effects of mutations to
            a Josiah strain GPC on cell entry and antibody escape.

            Use the *Dataset* dropdown in the left toolbar to choose which phenotype to show.
            You can click on points on the site lineplot to highlight them on the structure
            and show the effects of individual mutations.
            """
        )
    )

print(f"Concatenating phenotype JSONs to {dms_viz_json}")
subprocess.run(
    [
        "configure-dms-viz", "join",
        "--input", ", ".join(pheno_jsons),
        "--output", dms_viz_json,
        "--description", markdown_description,
    ],
    check=True,
)