# Imports

In [None]:
import mdtraj as md
import numpy as np
from datetime import datetime
from tqdm import tqdm
from pathlib import Path
import yaml

## Paths

In [None]:
idir = Path("/Users/alexpayne/lilac-mount-point/asap-datasets/current/sars_01_prepped_v3")
pdb_fn = Path("/Users/alexpayne/lilac-mount-point/asap-datasets/current/sars_01_prepped_v3/Mpro-x0072_0A_AAR-POS-d2a4d1df-1/Mpro-x0072_0A_AAR-POS-d2a4d1df-1_prepped_receptor_0.pdb")
pdb_fn2 = Path("/Users/alexpayne/lilac-mount-point/asap-datasets/current/sars_01_prepped_v3/Mpro-P0010_0A_PET-UNK-c9c1e0d8-4/Mpro-P0010_0A_PET-UNK-c9c1e0d8-4_prepped_receptor_0.pdb")
sars_selection_fn = Path("../../../metadata/sars2.yaml")

In [None]:
pdb_fn.exists()

In [None]:
pdb_fn2.exists()

## load files

In [None]:
pdb1 = md.load(pdb_fn)

In [None]:
pdb2 = md.load(pdb_fn2)

In [None]:
with open(sars_selection_fn, 'r') as f:
    sars_sel = yaml.safe_load(f)

In [None]:
sars_sel

In [None]:
def pymol_to_mdtraj(pymol_string):
    """
    Simple function to convert a pymol-like residue string into mdtraj-like
    """
    x = pymol_string.replace('+', ' or residue ')
    x = x.replace('-', ' to ')
    x = x.replace('resi ', 'residue ')
    x = x.replace("polymer.protein", "")
    return x

In [None]:
for selection in sars_sel.values():
    print(pymol_to_mdtraj(selection))

# Examine topology

In [None]:
pdb1.topology

In [None]:
pdb2.topology

In [None]:
pdb1.atom_slice(pdb1.topology.select("chainid 0"))

In [None]:
pdb2.topology.chain(1).topology

In [None]:
def make_selection(t, string_):
    print(t.atom_slice(t.topology.select(string_)))

In [None]:
string_ = "chainid 0"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

In [None]:
string_ = "chainid 1"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

In [None]:
string_ = "chainid 2"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

In [None]:
string_ = "chainid 3"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

In [None]:
string_ = "(chainid 0 or chainid 1) and resid 41"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

In [None]:
string_ = "not (name =~ 'H.*') and chainid 2"
for pdb in [pdb1, pdb2]:
    make_selection(pdb, string_)

## doin it

In [None]:
full_protein = "not element H and (chainid 0 or chainid 2)"
active_site_string = "not element H and (chainid 0 or chainid 2) and (residue 140 to 145 or residue 163 or residue 172 or residue 25 to 27 or residue 41 or residue 49 or residue 54 or residue 165 to 168 or residue 189 to 192)"

In [None]:
for pdb in [pdb1, pdb2]:
    make_selection(pdb, full_protein)

In [None]:
md.rmsd(pdb1, pdb2, atom_indices=pdb1.topology.select(active_site_string))

In [None]:
md.rmsd(pdb1, pdb2, atom_indices=pdb2.topology.select(active_site_string))

In [None]:
pdb1_active_site = pdb1.atom_slice(pdb1.topology.select(active_site_string))

In [None]:
pdb2_active_site = pdb2.atom_slice(pdb2.topology.select(active_site_string))

In [None]:
pdb2_active_site

In [None]:
pdb1_active_site

In [None]:
t = md.join([pdb1_active_site, pdb2_active_site])

In [None]:
t.save_pdb("combined_test.pdb")

In [None]:
help(t.save_pdb)

In [None]:
md.shrake_rupley(t)

# Iterate through all PDBs 

In [None]:
import logging
def analyze_mp(fn, out_dir):
    full_protein_selection = "not element H and (chainid 0 or chainid 2)"
    active_site_string = "not element H and (chainid 0 or chainid 2) and (residue 140 to 145 or residue 163 or residue 172 or residue 25 to 27 or residue 41 or residue 49 or residue 54 or residue 165 to 168 or residue 189 to 192)"
    output_name = fn.stem

    ## Prepare logger
    handler = logging.FileHandler(out_dir / f"{output_name}-log.txt", mode="w")
    prep_logger = logging.getLogger(output_name)
    prep_logger.setLevel(logging.INFO)
    prep_logger.addHandler(handler)
    prep_logger.info(datetime.isoformat(datetime.now()))

    prep_logger.info(f"Loading {fn}")
    pdb = md.load(fn)

    active_site_idx = pdb.topology.select(active_site_string)
    active_site = pdb.atom_slice(active_site_idx)

    full_protein_idx = pdb.topology.select(full_protein_selection)
    full_protein = pdb.atom_slice(full_protein_idx)

    prep_logger.info(f"Saving idx arrays to {out_dir}")
    np.save(out_dir / f"{output_name}_active_site.npy", active_site_idx)
    np.save(out_dir / f"{output_name}_full_protein.npy", full_protein_idx)

    prep_logger.info(f"Saving pdbs to {out_dir}")
    active_site.save(out_dir / f"{output_name}_active_site.pdb")
    full_protein.save(out_dir / f"{output_name}_full_protein.pdb")

    return True

In [None]:
analyze_mp(pdb_fn, Path("."))

In [None]:
pdb_fn.stem