# Convert escape and functional scores to polyclonal b factors for PDB viewing

In [None]:
# Imports
import pandas as pd
import polyclonal

In [None]:
func_scores_data_file = "../results/func_effects/averages/293T_entry_func_effects.csv"
input_pdb_file = "../data/7puy.pdb"

In [None]:
def functional_scores_to_b_factors(input_pdb_file, output_pdb_file, score_metric, n_models, times_seen, func_scores_file):
    """
    Function to map funcitonal scores to a pdb structure
    using mut_escape_pdb_b_factor function from polyclonal.
    """
    
    functional_scores = pd.read_csv(func_scores_file)
    
    functional_scores = (
        functional_scores.query(
            "n_selections >= @n_models and times_seen >= @times_seen"
        )
        .drop(columns=["mutant", "times_seen", "wildtype"])
        .groupby(["site"])
        .aggregate({
            "effect" : score_metric
        })
        .reset_index()
    )

    functional_scores["effect"] = functional_scores["effect"]
    
    functional_scores_aA = functional_scores.copy()
    functional_scores_bB = functional_scores.copy()
    functional_scores_cC = functional_scores.copy()
    
    functional_scores_aA["chain"] = (
        functional_scores_aA.apply(lambda x: "A" if x["site"] <= 259 else "a", axis=1)
    )
    
    functional_scores_bB["chain"] = (
        functional_scores_bB.apply(lambda x: "B" if x["site"] <= 259 else "b", axis=1)
    )
    
    functional_scores_cC["chain"] = (
        functional_scores_cC.apply(lambda x: "C" if x["site"] <= 259 else "c", axis=1)
    )
    
    functional_scores = (
        pd.concat([
            functional_scores_aA,
            functional_scores_bB,
            functional_scores_cC,
        ], ignore_index=True)
    )

    functional_scores = functional_scores.astype({"site" : "int"}) 

    print(functional_scores["effect"].quantile(0.50))

    polyclonal.pdb_utils.reassign_b_factor(
        input_pdbfile=input_pdb_file,
        output_pdbfile=output_pdb_file,
        df=functional_scores,
        metric_col="effect",
        site_col="site",
        chain_col="chain",
        missing_metric=0,
    )

In [None]:
score_metric = "sum"
times_seen = 3
n_models = 8

# Functional scores mapped to pdb structure
functional_output = "functional_scores.pdb"
functional_scores_to_b_factors(input_pdb_file, functional_output, score_metric, n_models, times_seen, func_scores_data_file)