In [1]:
%cd ~/REVIVAL2

/disk2/fli/REVIVAL2


In [9]:
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

In [21]:
import numpy as np

from REVIVAL.util import get_file_name, get_dir_name, checkNgen_folder

In [None]:

def parse_chai_scores(mut_structure_dir: str, output_dir: str = "zs/chai/output"):

    """
    A function for going through the subfolder and getting the chai scores
    to generate a dataframe with the following columns:
        - var: The mutation, ie I165A:I183A:Y301V
        - rep: The replicate number
        - aggregate_score
        - ptm
        - iptm
        - chain_ptm_A
        - chain_ptm_B
        - chain_iptm_AA
        - chain_iptm_AB
        - chain_iptm_BA
        - chain_iptm_BB
        - has_inter_chain_clashes

    Args:
    - input_dir, str: The path to the folder containing the chai score
        ie zs/chai/mut_structure/PfTrpB-4bromo
    - output_dir, str: The path to the folder to save the dataframe to
        ie zs/chai/output
    """

    output_dir = checkNgen_folder(output_dir)
    lib_name = os.path.basename(mut_structure_dir)

    # init dataframe
    df = pd.DataFrame(
        columns=[
            "var",
            "rep",
            "aggregate_score",
            "ptm",
            "iptm",
            "chain_ptm_A",
            "chain_ptm_B",
            "chain_iptm_AB",
            "chain_iptm_BA",
            "has_inter_chain_clashes",
        ]
    )

    for subfolder in glob(f"{mut_structure_dir}/*"):
        var = os.path.basename(subfolder)
        
        for rep_npz in glob(f"{subfolder}/*.npz"):

            npz = np.load(rep_npz)

            df = df._append(
                {
                    "var": var,
                    "rep": get_file_name(rep_npz).split("_")[-1],
                    "aggregate_score": npz["aggregate_score"][0],
                    "ptm": npz["ptm"][0],
                    "iptm": npz["iptm"][0],
                    "chain_ptm_A": npz["per_chain_ptm"][0][0],
                    "chain_ptm_B": npz["per_chain_ptm"][0][1],
                    "chain_iptm_AB": npz["per_chain_pair_iptm"][0][0, 1],
                    "chain_iptm_BA": npz["per_chain_pair_iptm"][0][1, 0],
                    "has_inter_chain_clashes": npz["has_inter_chain_clashes"][0],
                },
                ignore_index=True,
            )

    df.to_csv(f"{output_dir}/{lib_name}.csv", index=False)
    print(f"Saved chai scores for {lib_name} to {output_dir}/{lib_name}.csv")

In [5]:
# load the npz file
npz = np.load('/disk2/fli/REVIVAL2/zs/chai/mut_structure/PfTrpB-4bromo/I165A:I183A:Y301V/I165A:I183A:Y301V_0.npz')

In [15]:
npz["per_chain_ptm"][0][0]

0.9515228

In [18]:
npz["per_chain_pair_iptm"][0][0, 0], npz["per_chain_pair_iptm"][0][0, 1]

(0.9515228, 0.51923597)

In [7]:
for key in npz.files:
    print(f"{key}: {npz[key]}")


aggregate_score: [0.85367167]
ptm: [0.9502157]
iptm: [0.82953566]
per_chain_ptm: [[0.9515228  0.61292917]]
per_chain_pair_iptm: [[[0.9515228  0.51923597]
  [0.82953566 0.61292917]]]
has_inter_chain_clashes: [False]
chain_intra_clashes: [[0 0]]
chain_chain_inter_clashes: [[[0 0]
  [0 0]]]
