In [17]:
import os, pickle
import pandas as pd

In [2]:
with open("../../../training_data/7.Extra_set/features.pkl", "rb") as f:
    featuresd = pickle.load(f)

len(featuresd), featuresd

(9,
 {'7gqu':     Residues                                                          \
           pdb label_entity_id label_asym_id label_seq_id auth_asym_id   
  0       7gqu               1             A           12            A   
  1       7gqu               1             A           13            A   
  2       7gqu               1             A           14            A   
  3       7gqu               1             A           15            A   
  4       7gqu               1             A           16            A   
  ..       ...             ...           ...          ...          ...   
  414     7gqu               1             A          426            A   
  415     7gqu               1             A          427            A   
  416     7gqu               1             A          428            A   
  417     7gqu               1             A          429            A   
  418     7gqu               1             A          430            A   
  
                       

# Predict

In [3]:
passer_models = ["ensemble", "automl", "rank"]

for model in passer_models:
    os.makedirs(f"{model}", exist_ok=True)

In [4]:
import requests
from tqdm.notebook import tqdm

In [8]:
for pdb in tqdm(featuresd, smoothing=0):
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            os.makedirs(pdbdir, exist_ok=True)
            results = requests.post(
                'https://passer.smu.edu/api', 
                files={'pdbFile': open(f"../structures/{pdb}.pdb", 'r')}, 
                data={
                    "model": model,
                    "format": "zip"
                }
            )
            if results.status_code == 200:
                with open(f"{model}/{pdb}/result.zip", "wb") as f:
                    f.write(results.content)
                os.system(f"unzip -qq {model}/{pdb}/result.zip -d {model}/{pdb}")
            else:
                print("Failed:", pdb, model)

  0%|          | 0/9 [00:00<?, ?it/s]

In [9]:
# Any missing
for pdb in featuresd:
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            print("Missing:", pdb, model)

In [11]:
from Bio import PDB

In [12]:
# Sanity check
for pdb in featuresd:
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if os.path.isdir(pdbdir):
            pdbf = f"{pdbdir}/{pdb}_out.pdb"
            if os.path.isfile(pdbf):
                # Check that the length of the input pdb and output pdb (without heteroatoms of the pocket spheres) are the same
                if len(tuple(
                    residue
                    for model in PDB.PDBParser(QUIET=True).get_structure(pdb, f"../structures/{pdb}.pdb")
                    for chain in model
                    for residue in chain
                )) != len(tuple(
                    residue
                    for model in PDB.PDBParser(QUIET=True).get_structure(pdb, pdbf)
                    for chain in model
                    for residue in chain
                    if residue.id[0] == ' ' # it's not heteroatom                    
                )):
                    print("Input and output PDBs with different number of residues:", pdb)
            else:
                print("Missing pdb:", pdb)

# Processing

In [13]:
from biotite.structure.io.pdb import PDBFile

In [14]:
def process_passer_pocket(f):
    atom_array = PDBFile.read(f).get_structure()
    return pd.DataFrame({
        "auth_asym_id": atom_array.chain_id,
        "auth_seq_id": atom_array.res_id,
        "pdbx_PDB_ins_code": (ic or '?' for ic in atom_array.ins_code)
    }, dtype=str).drop_duplicates()

In [18]:
passer_results = {}

for model in passer_models:
    modeld = {}
    
    for pdb in featuresd:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            print("Missing:", pdb, model)
            continue
        with open(f"{pdbdir}/passer.txt", "r") as f:
            pockets = {
            line.split()[2].rstrip(':'): {
                "prob/score": float(line.split()[-1])
            } 
            for line in f.read().splitlines()
        }

        for pocket in pockets:
            pockets[pocket]["residues"] = process_passer_pocket(f"{pdbdir}/pockets/pocket{pocket}_atm.pdb")

        modeld[pdb] = pockets

    passer_results[model] = modeld

passer_results

{'ensemble': {'7gqu': {'20': {'prob/score': 50.99665205925703,
    'residues':    auth_asym_id auth_seq_id pdbx_PDB_ins_code
    0             A         916                 ?
    1             A         917                 ?
    4             A         920                 ?
    5             A         727                 ?
    6             A         919                 ?
    8             A         555                 ?
    9             A         552                 ?
    11            A         551                 ?
    12            A         846                 ?
    13            A         845                 ?
    15            A         849                 ?
    17            A         913                 ?
    25            A         706                 ?
    26            A         570                 ?
    27            A         726                 ?
    29            A         725                 ?
    30            A         898                 ?
    31            A      

In [19]:
passer_resultsf = "passer_results.pkl"

with open(passer_resultsf, "wb") as f:
    pickle.dump(passer_results, f)