# Create filtered antibody escape CSVs based on configuration for plotting

This notebook filters the antibody escape data based on filters applied when plotting the data and outputs filtered escape CSV.

In [None]:
# Imports 
import os
import pandas as pd
import seaborn as sns

File paths for data:

In [None]:
# this cell is tagged as `parameters` for papermill parameterization
func_scores = None

escape_377H = None
escape_89F = None
escape_2510C = None
escape_121F = None
escape_256A = None
escape_372D = None

min_times_seen = None
min_func_score = None
n_selections = None
frac_models = None

out_dir = None

filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None

In [None]:
# # Uncomment for running interactive
# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"

# escape_377H = "../results/antibody_escape/averages/377H_mut_effect.csv"
# escape_89F = "../results/antibody_escape/averages/89F_mut_effect.csv"
# escape_2510C = "../results/antibody_escape/averages/2510C_mut_effect.csv"
# escape_121F = "../results/antibody_escape/averages/121F_mut_effect.csv"
# escape_256A = "../results/antibody_escape/averages/256A_mut_effect.csv"
# escape_372D = "../results/antibody_escape/averages/372D_mut_effect.csv"

# min_times_seen = 2
# min_func_score = -1.5
# n_selections = 8
# frac_models = 1

# out_dir = "../results/filtered_antibody_escape_CSVs/"

# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"

In [None]:
def get_filtered_csv(escape_file, output_file, func_scores, min_times_seen, min_func_score, n_selections, frac_models):
    """Function that filters and writes an antibody escape csv"""

    # Read data
    escape_df = pd.read_csv(escape_file)
    func_scores = pd.read_csv(func_scores)

    # Filter functional scores
    func_scores = (
        func_scores.loc[
            (func_scores["times_seen"] >= min_times_seen)
            &
            (func_scores["n_selections"] >= n_selections)
        ]
    )
    # Create mutation column to match antibody df
    func_scores["site"] = func_scores["site"].astype(str)
    func_scores["mutation"] = func_scores["wildtype"] + func_scores["site"] + func_scores["mutant"]
    func_scores_filter = func_scores.loc[func_scores["effect"] >= min_func_score]["mutation"].tolist()

    # Filter escape df
    escape_df = (
        escape_df.loc[
            (escape_df["mutation"].isin(func_scores_filter))
            &
            (escape_df["times_seen"] >= min_times_seen)
            &
            (escape_df["frac_models"] >= frac_models)
        ]
        .reset_index(drop=True)
    )

    escape_df.to_csv(output_file, index=False)

In [None]:
antibody_input_files = [
    escape_377H,
    escape_89F,
    escape_2510C,
    escape_121F,
    escape_256A,
    escape_372D,
]

antibody_output_files = [
    filtered_escape_377H,
    filtered_escape_89F,
    filtered_escape_2510C,
    filtered_escape_121F,
    filtered_escape_256A,
    filtered_escape_372D,
]

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Iterate through list of antibody files
for i in range(len(antibody_input_files)):
    get_filtered_csv(antibody_input_files[i], antibody_output_files[i], func_scores, min_times_seen, min_func_score, n_selections, frac_models)