# Map escape scores onto pdb structure

In [1]:
# Imports
import os
import pandas as pd
import polyclonal

File paths for data:

In [2]:
# this cell is tagged as `parameters` for papermill parameterization
pdb_file = None

filtered_csv_dir = None
pdb_dir = None

In [3]:
# # Uncomment for running interactive
# pdb_file = "../data/7puy.pdb"

# filtered_csv_dir = "../results/filtered_CSVs/"
# pdb_dir = "../results/mapped_scores_onto_pdb/"

In [4]:
def escape_scores_to_b_factors(input_pdb_file, output_pdb_file_dir, filtered_file_dir, antibody_name):
    """
    Function to map escape scores to a pdb structure
    using mut_escape_pdb_b_factor function from polyclonal.
    """
    
    escape_scores = pd.read_csv(filtered_file_dir+antibody_name+"_filtered_mut_effects.csv")

    # Add dummy phenotype column
    escape_scores["phenotype"] = "escape"

    # Calculate site sums
    escape_scores = (
        escape_scores.groupby(["site"])
        .aggregate({
            "floored_escape" : "sum"
        })
        .reset_index()
    )

    print(antibody_name)
    print(f"Max summed escape: {escape_scores['floored_escape'].max()}") # Verify max matches altair plots
    print(f"50th percentile of summed escape scores: {escape_scores['floored_escape'].quantile(0.50)}")
    print(f"75th percentile of summed escape scores: {escape_scores['floored_escape'].quantile(0.75)}")
    print(f"90th percentile of summed escape scores: {escape_scores['floored_escape'].quantile(0.90)}")
    print(f"95th percentile of summed escape scores: {escape_scores['floored_escape'].quantile(0.95)}")
    print(f"99th percentile of summed escape scores: {escape_scores['floored_escape'].quantile(0.99)}")
    print()
    
    escape_scores_aA = escape_scores.copy()
    escape_scores_bB = escape_scores.copy()
    escape_scores_cC = escape_scores.copy()
    
    escape_scores_aA["chain"] = (
        escape_scores_aA.apply(lambda x: "A" if x["site"] <= 259 else "a", axis=1)
    )
    
    escape_scores_bB["chain"] = (
        escape_scores_bB.apply(lambda x: "B" if x["site"] <= 259 else "b", axis=1)
    )
    
    escape_scores_cC["chain"] = (
        escape_scores_cC.apply(lambda x: "C" if x["site"] <= 259 else "c", axis=1)
    )
    
    escape_scores = (
        pd.concat([
            escape_scores_aA,
            escape_scores_bB,
            escape_scores_cC,
        ], ignore_index=True)
    )

    escape_scores = escape_scores.astype({"site" : "int"}) 

    polyclonal.pdb_utils.reassign_b_factor(
        input_pdbfile=input_pdb_file,
        output_pdbfile=output_pdb_file_dir+antibody_name+"_mapped_pdb.pdb",
        df=escape_scores,
        metric_col="floored_escape",
        site_col="site",
        chain_col="chain",
    )

In [5]:
# Process antibody escape data
antibodies = [
    "S4378",
    "S43711",
    "S43720",
    "S43727",
    "S43742",
    "S44428",
    "S44433",
    "S44446",
    "S44478",
    "S43752",
]

# Make output dir if doesn't exist
if not os.path.exists(pdb_dir):
    os.mkdir(pdb_dir)

for antibody in antibodies:
    escape_scores_to_b_factors(pdb_file, pdb_dir, filtered_csv_dir, antibody)

S4378
Max summed escape: 83.2097
50th percentile of summed escape scores: 1.447135
75th percentile of summed escape scores: 2.88186
90th percentile of summed escape scores: 14.849520000000002
95th percentile of summed escape scores: 32.51816949999989
99th percentile of summed escape scores: 63.73507200000001

S43711
Max summed escape: 115.727
50th percentile of summed escape scores: 1.1658
75th percentile of summed escape scores: 2.0886500000000003
90th percentile of summed escape scores: 3.2391039999999998
95th percentile of summed escape scores: 3.9858431999999993
99th percentile of summed escape scores: 24.311042400000023

S43720
Max summed escape: 106.922
50th percentile of summed escape scores: 1.9300499999999998
75th percentile of summed escape scores: 4.287744999999999
90th percentile of summed escape scores: 10.912341
95th percentile of summed escape scores: 22.876552
99th percentile of summed escape scores: 60.82993000000006

S43727
Max summed escape: 19.19153
50th percentile 