In [1]:
import os, pickle
import pandas as pd

In [2]:
with open("../../../training_data/8.Apos/Extra_set/features.pkl", "rb") as f:
    featuresd = pickle.load(f)

len(featuresd), featuresd

(9,
 {'8sgj':     Residues                                                          \
           pdb label_entity_id label_asym_id label_seq_id auth_asym_id   
  0       8sgj               1             A           52            A   
  1       8sgj               1             A           53            A   
  2       8sgj               1             A           54            A   
  3       8sgj               1             A           55            A   
  4       8sgj               1             A           56            A   
  ..       ...             ...           ...          ...          ...   
  746     8sgj               1             A          941            A   
  747     8sgj               1             A          942            A   
  748     8sgj               1             A          943            A   
  749     8sgj               1             A          944            A   
  750     8sgj               1             A          945            A   
  
                       

# Predict

In [3]:
passer_models = ["ensemble", "automl", "rank"]

for model in passer_models:
    os.makedirs(f"{model}", exist_ok=True)

In [4]:
import requests
from tqdm.notebook import tqdm

In [5]:
for pdb in tqdm(featuresd, smoothing=0):
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            os.makedirs(pdbdir, exist_ok=True)
            results = requests.post(
                'https://passer.smu.edu/api', 
                files={'pdbFile': open(f"../structures/{pdb}.pdb", 'r')}, 
                data={
                    "model": model,
                    "format": "zip"
                }
            )
            if results.status_code == 200:
                with open(f"{model}/{pdb}/result.zip", "wb") as f:
                    f.write(results.content)
                os.system(f"unzip -qq {model}/{pdb}/result.zip -d {model}/{pdb}")
            else:
                print("Failed:", pdb, model)

  0%|          | 0/9 [00:00<?, ?it/s]

In [6]:
# Any missing
for pdb in featuresd:
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            print("Missing:", pdb, model)

In [7]:
from Bio import PDB

In [8]:
# Sanity check
for pdb in featuresd:
    for model in passer_models:
        pdbdir = f"{model}/{pdb}"
        if os.path.isdir(pdbdir):
            pdbf = f"{pdbdir}/{pdb}_out.pdb"
            if os.path.isfile(pdbf):
                # Check that the length of the input pdb and output pdb (without heteroatoms of the pocket spheres) are the same
                if len(tuple(
                    residue
                    for model in PDB.PDBParser(QUIET=True).get_structure(pdb, f"../structures/{pdb}.pdb")
                    for chain in model
                    for residue in chain
                )) != len(tuple(
                    residue
                    for model in PDB.PDBParser(QUIET=True).get_structure(pdb, pdbf)
                    for chain in model
                    for residue in chain
                    if residue.id[0] == ' ' # it's not heteroatom                    
                )):
                    print("Input and output PDBs with different number of residues:", pdb)
            else:
                print("Missing pdb:", pdb)

# Processing

In [9]:
from biotite.structure.io.pdb import PDBFile

In [10]:
def process_passer_pocket(f):
    atom_array = PDBFile.read(f).get_structure()
    return pd.DataFrame({
        "auth_asym_id": atom_array.chain_id,
        "auth_seq_id": atom_array.res_id,
        "pdbx_PDB_ins_code": (ic or '?' for ic in atom_array.ins_code)
    }, dtype=str).drop_duplicates()

In [11]:
passer_results = {}

for model in passer_models:
    modeld = {}
    
    for pdb in featuresd:
        pdbdir = f"{model}/{pdb}"
        if not os.path.isdir(pdbdir) or len(os.listdir(pdbdir)) <= 1:
            print("Missing:", pdb, model)
            continue
        with open(f"{pdbdir}/passer.txt", "r") as f:
            pockets = {
            line.split()[2].rstrip(':'): {
                "prob/score": float(line.split()[-1])
            } 
            for line in f.read().splitlines()
        }

        for pocket in pockets:
            pockets[pocket]["residues"] = process_passer_pocket(f"{pdbdir}/pockets/pocket{pocket}_atm.pdb")

        modeld[pdb] = pockets

    passer_results[model] = modeld

passer_results

{'ensemble': {'8sgj': {'54': {'prob/score': 46.400259248912334,
    'residues':     auth_asym_id auth_seq_id pdbx_PDB_ins_code
    0              A         712                 ?
    1              A         715                 ?
    3              A         711                 ?
    5              A         237                 ?
    7              A         708                 ?
    8              A         707                 ?
    16             A         761                 ?
    17             A         760                 ?
    20             A         759                 ?
    25             A         236                 ?
    27             A          66                 ?
    30             A         235                 ?
    33             A          63                 ?
    39             A          62                 ?
    44             A         241                 ?
    45             A          69                 ?
    46             A         232                 ?
    49

In [12]:
passer_resultsf = "passer_results.pkl"

with open(passer_resultsf, "wb") as f:
    pickle.dump(passer_results, f)