- Original ALLO features from DoGSiteScorer cannot be used.
- Highest probability pocket for each PDB is taken as the positive prediction.

<br>

In [1]:
pdbs = ['7gqu', '7yg5', '8aq6', '8f4s', '8jp0', '8qni', '8uk6', '8v81', '9dnm']

# DoGSiteScorer

In [2]:
import os, requests, json, time

In [3]:
def get_state(location):
    response = requests.get(location)
    return response.status_code, response

In [71]:
def upload_pdb(file_path, max_retries=5, wait_time=5):
    pdb_upload_url = "https://proteins.plus/api/pdb_files_rest"
    headers = {"Accept": "application/json"}
    
    pdb_file = open(file_path, "rb")
    files = {"pdb_file[pathvar]": pdb_file}
    upload_response = requests.post(pdb_upload_url, headers=headers, files=files)
    pdb_file.close()
    
    location = upload_response.json().get("location")

    time.sleep(10)
    status_code, response = get_state(location)

    retries = 0
    while status_code != 200 and retries < max_retries:
        wait = wait_time*(retries+1)
        print("({}) {}: try again after {} seconds".format(file_path, response.json().get("message"), wait))
        time.sleep(wait)
        status_code, response = get_state(location)
        retries += 1

    return response.json().get("id")

In [79]:
def run_dogsite_analysis(pdb_id, max_retries=5, wait_time=30):
    dogsite_url = "https://proteins.plus/api/dogsite_rest"
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }
    data = {
        "dogsite": {
            "pdbCode": pdb_id,
            "analysisDetail": "0",
            "bindingSitePredictionGranularity": "1",
            "ligand": "",
            "chain": ""
        }
    }

    upload_response = requests.post(dogsite_url, headers=headers, data=json.dumps(data))
    location = upload_response.json().get("location")
    
    time.sleep(10)
    status_code, response = get_state(location)

    retries = 0
    while status_code != 200 and retries < max_retries:
        wait = wait_time*(retries+1)
        print("({}) {}: try again after {} seconds".format(pdb_id, response.json().get("message"), wait))
        time.sleep(wait)
        status_code, response = get_state(location)
        retries += 1

    return response.json()

In [76]:
def download(url, path):
    response = requests.get(url)
    if response.status_code == 200:
        with open("{}/{}".format(path, url.split("/")[-1]), "wb") as f:
            f.write(response.content)

In [77]:
for pdb in pdbs:
    pdb_path = "../structures/{}.pdb".format(pdb)
    if not os.path.isdir(pdb):
        os.makedirs(pdb)
        pockets = "{}/pockets".format(pdb)
        if not os.path.isdir(pockets):
            os.makedirs(pockets)

        pdb_id = upload_pdb(pdb_path)
        
        result = run_dogsite_analysis(pdb_id)
        
        if result is not None:
            for url in result.get("pockets", []) + result.get("residues", []):
                download(url, pockets)
            
            download(result.get("descriptor_explanation"), pdb)
            download(result.get("result_table"), pdb)

(8f4spdbba7fbf1c-d660-41ad-84de-7e0607ef955f) Job exists and is still in 'processing' state: try again after 20 seconds
(8f4spdbba7fbf1c-d660-41ad-84de-7e0607ef955f) Job exists and is still in 'processing' state: try again after 40 seconds
(8jp0pdbf1334ccc-ca29-4e8f-b6a3-4457b1823b88) Job exists and is still in 'processing' state: try again after 20 seconds
(8jp0pdbf1334ccc-ca29-4e8f-b6a3-4457b1823b88) Job exists and is still in 'processing' state: try again after 40 seconds
(8jp0pdbf1334ccc-ca29-4e8f-b6a3-4457b1823b88) Job exists and is still in 'processing' state: try again after 60 seconds
(8qnipdb71ca3325-fc8a-4a7b-85af-5708b0c56e34) Job exists and is still in 'processing' state: try again after 20 seconds
(8qnipdb71ca3325-fc8a-4a7b-85af-5708b0c56e34) Job exists and is still in 'processing' state: try again after 40 seconds
(8qnipdb71ca3325-fc8a-4a7b-85af-5708b0c56e34) Job exists and is still in 'processing' state: try again after 60 seconds
(8uk6pdb6253e849-e69e-4f9f-bc56-8d37b068

## Old vs. new version

In [11]:
import pandas as pd

In [12]:
old = pd.read_csv("allo/src/test_input/AS091022202_3PJG_complex.txt", sep="\t")
old

Unnamed: 0,name,lig_cov,poc_cov,lig_name,4A_crit,volume,hull,surface,lid,depth,...,I,N,DA,DC,DG,DT,DN,UNK,simpleScore,drugScore
0,AS091022202_3PJG_complex_chains_desc_P_1,0.0,0.0,non,0,926.336,1125.44,956.64,168.8,18.4,...,0,0,0,0,0,0,0,0,0.537801,0.520085
1,AS091022202_3PJG_complex_chains_desc_P_2,0.0,0.0,non,0,782.464,917.44,707.52,209.92,29.1232,...,0,0,0,0,0,0,0,0,0.445098,0.417353
2,AS091022202_3PJG_complex_chains_desc_P_3,23.7288,7.66129,UGA_A_90,1,555.52,659.52,551.68,107.84,16.0449,...,0,0,0,0,0,0,0,0,0.404041,0.36349
3,AS091022202_3PJG_complex_chains_desc_P_4,0.0,0.0,non,0,285.312,419.2,406.24,12.96,13.937,...,0,0,0,0,0,0,0,0,0.282709,0.100086
4,AS091022202_3PJG_complex_chains_desc_P_5,0.0,0.0,non,0,251.328,356.96,351.2,5.76,12.6554,...,0,0,0,0,0,0,0,0,0.365878,0.202703
5,AS091022202_3PJG_complex_chains_desc_P_6,0.0,0.0,non,0,148.544,198.24,152.32,45.92,7.16659,...,0,0,0,0,0,0,0,0,0.256271,0.420902
6,AS091022202_3PJG_complex_chains_desc_P_7,42.3729,85.4217,UGA_A_90,1,131.264,178.08,143.2,34.88,9.55824,...,0,0,0,0,0,0,0,0,0.532231,0.652536
7,AS091022202_3PJG_complex_chains_desc_P_8,0.0,0.0,non,0,116.224,169.76,148.96,20.8,8.88144,...,0,0,0,0,0,0,0,0,0.053244,0.447991
8,AS091022202_3PJG_complex_chains_desc_P_9,0.0,0.0,non,0,115.776,168.0,120.48,47.52,8.40952,...,0,0,0,0,0,0,0,0,0.0,0.266754
9,AS091022202_3PJG_complex_chains_desc_P_10,0.0,0.0,non,0,113.344,173.44,152.64,20.8,9.67471,...,0,0,0,0,0,0,0,0,0.231786,0.324773


In [13]:
new = pd.read_csv("7gqu/7gqupdb1eccfc79-6d92-4241-bb30-60d0587ea079_desc.txt", sep="\t")
new

Unnamed: 0,name,lig_cov,poc_cov,lig_name,volume,enclosure,surface,depth,surf/vol,lid/hull,...,MET,PHE,PRO,SER,THR,TRP,TYR,VAL,simpleScore,drugScore
0,P_0,0.0,0.0,,1516.22,0.06,1705.19,21.71,1.124632,-,...,3,4,0,4,3,0,4,1,0.65,0.79768
1,P_1,0.0,0.0,,343.3,0.05,404.34,12.22,1.177804,-,...,3,1,0,0,3,0,2,1,0.17,0.577353
2,P_2,0.0,0.0,,336.7,0.04,570.81,12.87,1.695307,-,...,0,2,1,1,1,0,2,2,0.19,0.568479
3,P_3,0.0,0.0,,313.41,0.12,459.98,15.46,1.467662,-,...,1,1,2,1,1,0,2,0,0.18,0.649976
4,P_4,0.0,0.0,,182.34,0.24,397.51,11.11,2.180048,-,...,0,1,2,1,0,2,0,1,0.02,0.411775
5,P_5,0.0,0.0,,173.31,0.18,362.55,12.36,2.091916,-,...,0,2,0,2,1,0,0,0,0.0,0.416132
6,P_6,0.0,0.0,,148.42,0.18,254.42,8.37,1.714189,-,...,1,0,1,1,0,0,1,0,0.01,0.261178
7,P_7,0.0,0.0,,144.26,0.23,245.32,8.85,1.700541,-,...,0,1,0,2,0,0,1,0,0.0,0.269649
8,P_8,0.0,0.0,,136.13,0.16,274.2,11.38,2.014251,-,...,1,1,0,0,0,0,0,0,0.0,0.37056
9,P_9,0.0,0.0,,127.81,0.25,242.8,6.57,1.899695,-,...,0,0,0,0,1,0,0,3,0.0,0.199447


### Different columns

In [14]:
set(new.columns) - set(old.columns)

{'enclosure', 'hydrophobic_interactions'}

In [15]:
new[list(set(new.columns) - set(old.columns))]

Unnamed: 0,hydrophobic_interactions,enclosure
0,94,0.06
1,19,0.05
2,28,0.04
3,27,0.12
4,17,0.24
5,10,0.18
6,12,0.18
7,7,0.23
8,13,0.16
9,7,0.25


In [16]:
set(old.columns) - set(new.columns)

{'4A_crit',
 'A',
 'C',
 'CSO',
 'DA',
 'DC',
 'DG',
 'DN',
 'DT',
 'G',
 'I',
 'N',
 'U',
 'UNK',
 'aromat',
 'hull',
 'lid',
 'sumAA'}

In [17]:
old[sorted(set(old.columns) - set(new.columns), key=lambda x: old.columns.tolist().index(x))]

Unnamed: 0,4A_crit,hull,lid,aromat,sumAA,CSO,A,C,G,U,I,N,DA,DC,DG,DT,DN,UNK
0,0,1125.44,168.8,17,40,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,917.44,209.92,9,36,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,659.52,107.84,9,27,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,419.2,12.96,4,22,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,356.96,5.76,15,19,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0,198.24,45.92,14,8,0,0,0,0,0,0,0,0,0,0,0,0,0
6,1,178.08,34.88,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,169.76,20.8,3,10,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0,168.0,47.52,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,173.44,20.8,4,10,0,0,0,0,0,0,0,0,0,0,0,0,0


**4A_crit is part of the labelling system of ALLO and must be kept/added.**

<br>

### Missing data in new output

In [19]:
new.dtypes.sort_values(ascending=False)

name                         object
ellVol                       object
lid/hull                     object
apolarAA                    float64
posAA                       float64
simpleScore                 float64
hydrophobicity              float64
ell b/a                     float64
ell c/a                     float64
surf/vol                    float64
depth                       float64
surface                     float64
enclosure                   float64
volume                      float64
lig_name                    float64
poc_cov                     float64
lig_cov                     float64
polarAA                     float64
drugScore                   float64
negAA                       float64
accept                        int64
VAL                           int64
TYR                           int64
TRP                           int64
THR                           int64
SER                           int64
PRO                           int64
PHE                         

In [20]:
new[["lid/hull", "ellVol"]]

Unnamed: 0,lid/hull,ellVol
0,-,-
1,-,-
2,-,-
3,-,-
4,-,-
5,-,-
6,-,-
7,-,-
8,-,-
9,-,-


### Patch

- Create a fake new aplc.tsv to train the neural network using only the features that are available.
- Process all outputs to remove features not available in old output

#### Training data

In [21]:
os.system("mv allo/data/aplc.tsv allo/data/aplc_original.tsv")

0

In [28]:
aplc = pd.read_csv("allo/data/aplc_original.tsv", sep="\t")
aplc

Unnamed: 0,name,lig_cov,poc_cov,lig_name,4A_crit,volume,hull,surface,lid,depth,...,I,N,DA,DC,DG,DT,DN,UNK,simpleScore,drugScore
0,AS001000501_3UO9_complex_chains_desc_P_1,0.00000,0.000000,non,0,661.207,749.567,678.2210,71.346400,24.90670,...,0,0,0,0,0,0,0,0,0.468934,0.271149
1,AS001000501_3UO9_complex_chains_desc_P_2,0.00000,0.000000,non,0,492.799,558.164,434.6690,123.495000,17.28740,...,0,0,0,0,0,0,0,0,0.262105,0.215701
2,AS001000501_3UO9_complex_chains_desc_P_3,88.13560,72.303800,04A_B_2,1,480.683,558.164,502.8630,55.300700,20.22080,...,0,0,0,0,0,0,0,0,0.593739,0.695285
3,AS001000501_3UO9_complex_chains_desc_P_4,0.00000,0.000000,non,0,463.811,565.327,453.2930,112.034000,20.41120,...,0,0,0,0,0,0,0,0,0.285292,0.254757
4,AS001000501_3UO9_complex_chains_desc_P_5,0.00000,0.000000,non,0,398.013,519.196,388.2510,130.945000,20.24210,...,0,0,0,0,0,0,0,0,0.260897,0.147678
5,AS001000501_3UO9_complex_chains_desc_P_6,0.00000,0.000000,non,0,381.601,493.694,428.6520,65.042700,21.60460,...,0,0,0,0,0,0,0,0,0.386060,0.136666
6,AS001000501_3UO9_complex_chains_desc_P_7,0.00000,0.000000,non,0,373.626,436.102,369.9130,66.188900,17.80990,...,0,0,0,0,0,0,0,0,0.510129,0.285342
7,AS001000501_3UO9_complex_chains_desc_P_8,0.00000,0.000000,non,0,373.166,496.273,412.3190,83.953800,22.42460,...,0,0,0,0,0,0,0,0,0.391934,0.178084
8,AS001000501_3UO9_complex_chains_desc_P_9,0.00000,0.000000,non,0,353.380,476.789,396.8470,79.942400,24.36580,...,0,0,0,0,0,0,0,0,0.397897,0.088986
9,AS001000501_3UO9_complex_chains_desc_P_10,3.38983,0.591447,04A_B_2,0,337.122,408.594,303.1510,105.444000,17.75350,...,0,0,0,0,0,0,0,0,0.144927,0.317734


In [29]:
assert set(aplc.columns) == set(old.columns)

In [51]:
columns = [c for c in aplc.columns if c not in set(
    list( set(new.columns) - set(aplc.columns) )
    + list( set(aplc.columns) - set(new.columns) )
    + ["lid/hull", "ellVol"]
) - {"4A_crit",}]
columns

['name',
 'lig_cov',
 'poc_cov',
 'lig_name',
 '4A_crit',
 'volume',
 'surface',
 'depth',
 'surf/vol',
 'ell c/a',
 'ell b/a',
 'siteAtms',
 'accept',
 'donor',
 'hydrophobicity',
 'metal',
 'Cs',
 'Ns',
 'Os',
 'Ss',
 'Xs',
 'negAA',
 'posAA',
 'polarAA',
 'apolarAA',
 'ALA',
 'ARG',
 'ASN',
 'ASP',
 'CYS',
 'GLN',
 'GLU',
 'GLY',
 'HIS',
 'ILE',
 'LEU',
 'LYS',
 'MET',
 'PHE',
 'PRO',
 'SER',
 'THR',
 'TRP',
 'TYR',
 'VAL',
 'simpleScore',
 'drugScore']

In [52]:
fake_aplc = aplc[columns]
fake_aplc

Unnamed: 0,name,lig_cov,poc_cov,lig_name,4A_crit,volume,surface,depth,surf/vol,ell c/a,...,MET,PHE,PRO,SER,THR,TRP,TYR,VAL,simpleScore,drugScore
0,AS001000501_3UO9_complex_chains_desc_P_1,0.00000,0.000000,non,0,661.207,678.2210,24.90670,0.549061,0.120963,...,2,3,2,5,0,0,2,1,0.468934,0.271149
1,AS001000501_3UO9_complex_chains_desc_P_2,0.00000,0.000000,non,0,492.799,434.6690,17.28740,0.472144,0.117456,...,2,1,1,4,0,0,1,1,0.262105,0.215701
2,AS001000501_3UO9_complex_chains_desc_P_3,88.13560,72.303800,04A_B_2,1,480.683,502.8630,20.22080,0.559987,0.081842,...,0,2,0,0,0,0,2,0,0.593739,0.695285
3,AS001000501_3UO9_complex_chains_desc_P_4,0.00000,0.000000,non,0,463.811,453.2930,20.41120,0.523148,0.093727,...,2,2,2,3,0,0,0,2,0.285292,0.254757
4,AS001000501_3UO9_complex_chains_desc_P_5,0.00000,0.000000,non,0,398.013,388.2510,20.24210,0.522158,0.090831,...,0,1,3,1,0,0,0,3,0.260897,0.147678
5,AS001000501_3UO9_complex_chains_desc_P_6,0.00000,0.000000,non,0,381.601,428.6520,21.60460,0.601286,0.055553,...,2,0,0,1,3,0,0,2,0.386060,0.136666
6,AS001000501_3UO9_complex_chains_desc_P_7,0.00000,0.000000,non,0,373.626,369.9130,17.80990,0.529967,0.145510,...,0,2,2,1,0,0,0,0,0.510129,0.285342
7,AS001000501_3UO9_complex_chains_desc_P_8,0.00000,0.000000,non,0,373.166,412.3190,22.42460,0.591451,0.048928,...,0,2,2,1,1,1,1,2,0.391934,0.178084
8,AS001000501_3UO9_complex_chains_desc_P_9,0.00000,0.000000,non,0,353.380,396.8470,24.36580,0.601128,0.032981,...,0,2,3,1,1,1,0,2,0.397897,0.088986
9,AS001000501_3UO9_complex_chains_desc_P_10,3.38983,0.591447,04A_B_2,0,337.122,303.1510,17.75350,0.481347,0.102600,...,0,1,1,1,0,0,0,1,0.144927,0.317734


In [54]:
fake_aplc.to_csv("../data/aplc.tsv", sep="\t", index=False) #"allo/data/aplc_new.tsv"

#### New data

In [81]:
for pdb in pdbs:
    if os.path.isdir(pdb):
        inf = next((f for f in os.listdir(pdb) if f.startswith(pdb) and f.endswith("_desc.txt")))
        (
            pd.read_csv("{}/{}".format(pdb, inf), sep="\t")
            .assign(**{"4A_crit": 0})
            [columns]
            .to_csv("{}/{}.txt".format(pdb, pdb), sep="\t", index=False)
        )

# Predictions

In [82]:
data = pd.concat((
    (
        pd.read_csv("{}/{}.txt".format(pdb, pdb), sep="\t")
        .assign(
            name=lambda x: x["name"].apply(
                lambda y: "{}_{}".format(pdb, y)
            )
        )
        [columns]
    )
    for pdb in pdbs
    if os.path.isdir(pdb) and os.path.isfile("{}/{}.txt".format(pdb, pdb))
))
data

Unnamed: 0,name,lig_cov,poc_cov,lig_name,4A_crit,volume,surface,depth,surf/vol,ell c/a,...,MET,PHE,PRO,SER,THR,TRP,TYR,VAL,simpleScore,drugScore
0,7gqu_P_0,0.0,0.0,,0,1516.22,1705.19,21.71,1.124632,0.22,...,3,4,0,4,3,0,4,1,0.65,0.797680
1,7gqu_P_1,0.0,0.0,,0,343.30,404.34,12.22,1.177804,0.28,...,3,1,0,0,3,0,2,1,0.17,0.577353
2,7gqu_P_2,0.0,0.0,,0,336.70,570.81,12.87,1.695307,0.25,...,0,2,1,1,1,0,2,2,0.19,0.568479
3,7gqu_P_3,0.0,0.0,,0,313.41,459.98,15.46,1.467662,0.16,...,1,1,2,1,1,0,2,0,0.18,0.649976
4,7gqu_P_4,0.0,0.0,,0,182.34,397.51,11.11,2.180048,0.08,...,0,1,2,1,0,2,0,1,0.02,0.411775
5,7gqu_P_5,0.0,0.0,,0,173.31,362.55,12.36,2.091916,0.17,...,0,2,0,2,1,0,0,0,0.00,0.416132
6,7gqu_P_6,0.0,0.0,,0,148.42,254.42,8.37,1.714189,0.59,...,1,0,1,1,0,0,1,0,0.01,0.261178
7,7gqu_P_7,0.0,0.0,,0,144.26,245.32,8.85,1.700541,0.33,...,0,1,0,2,0,0,1,0,0.00,0.269649
8,7gqu_P_8,0.0,0.0,,0,136.13,274.20,11.38,2.014251,0.19,...,1,1,0,0,0,0,0,0,0.00,0.370560
9,7gqu_P_9,0.0,0.0,,0,127.81,242.80,6.57,1.899695,0.23,...,0,0,0,0,1,0,0,3,0.00,0.199447


In [83]:
data.to_csv("data.txt", sep="\t", index=False)

In [84]:
os.system("cd allo/src && python rank_nn.py {}/data.txt".format( os.getcwd() ))

0

# Process

In [30]:
preds = pd.read_csv("data_nn_out.txt")
preds

Unnamed: 0,name,predicted probability
0,8jp0_P_0,0.19816
1,8qni_P_0,0.13827
2,9dnm_P_0,0.13759
3,8uk6_P_1,0.08049
4,7gqu_P_0,0.07519
5,8v81_P_0,0.04905
6,8v81_P_2,0.04507
7,8v81_P_3,0.04275
8,8uk6_P_0,0.04245
9,8f4s_P_13,0.04014


In [31]:
preds = preds.assign(
    **dict(
        zip(
            ("pdb", "pocket"),
            zip(*preds["name"].apply(lambda x: x.split("_", 1)).tolist())
        )
    )
)

preds

Unnamed: 0,name,predicted probability,pdb,pocket
0,8jp0_P_0,0.19816,8jp0,P_0
1,8qni_P_0,0.13827,8qni,P_0
2,9dnm_P_0,0.13759,9dnm,P_0
3,8uk6_P_1,0.08049,8uk6,P_1
4,7gqu_P_0,0.07519,7gqu,P_0
5,8v81_P_0,0.04905,8v81,P_0
6,8v81_P_2,0.04507,8v81,P_2
7,8v81_P_3,0.04275,8v81,P_3
8,8uk6_P_0,0.04245,8uk6,P_0
9,8f4s_P_13,0.04014,8f4s,P_13


In [47]:
from Bio import PDB

def process_pocket(pdb, pocket):
    f = next((
        f 
        for f in os.listdir("{}/pockets".format(pdb)) 
        if f.startswith(pdb) and f.endswith("_{}_res.pdb".format(pocket))
    ))
    s = PDB.PDBParser(QUIET=True).get_structure("protein", "{}/pockets/{}".format(pdb, f))
    return pd.DataFrame(
        {
            "auth_asym_id": chain.id,
            "auth_seq_id": res.get_id()[1],
            "pdbx_PDB_ins_code": res.get_id()[2] if res.get_id()[2] != " " else "?"
        }
        for model in s
        for chain in model
        for res in chain
    ).drop_duplicates()

In [49]:
results = {}

for pdb, pockets in preds.groupby("pdb"):
    d = {}
    
    pockets = (
        pockets
        .sort_values(["predicted probability", "pocket"], ascending=[False, True])
        [["predicted probability", "pocket"]]
        .values.tolist()
    )

    # Top pocket
    prob, pocket = pockets.pop(0)
    d[pocket] = {
        "prob": prob,
        "pred": 1,
        "residues": process_pocket(pdb, pocket)
    }
    
    for prob, pocket in pockets:
        d[pocket] = {
            "prob": prob,
            "pred": 0,
            "residues": process_pocket(pdb, pocket)
        }
        
    results[pdb] = d

results

{'7gqu': {'P_0': {'pred': 1,
   'prob': 0.07518999999999999,
   'residues':    auth_asym_id  auth_seq_id pdbx_PDB_ins_code
   0             A          571                 ?
   1             A          572                 ?
   2             A          573                 ?
   3             A          574                 ?
   4             A          575                 ?
   5             A          576                 ?
   6             A          577                 ?
   7             A          578                 ?
   8             A          581                 ?
   9             A          594                 ?
   10            A          596                 ?
   11            A          598                 ?
   12            A          599                 ?
   13            A          600                 ?
   14            A          601                 ?
   15            A          602                 ?
   16            A          604                 ?
   17            A         

In [6]:
import pickle

resultsf = "ALLO_results.pkl"

In [None]:
with open(resultsf, "wb") as f:
    pickle.dump(results, f)

In [7]:
with open(resultsf, "rb") as f:
    results = pickle.load(f)

results

{'7gqu': {'P_0': {'pred': 1,
   'prob': 0.07518999999999999,
   'residues':    auth_asym_id  auth_seq_id pdbx_PDB_ins_code
   0             A          571                 ?
   1             A          572                 ?
   2             A          573                 ?
   3             A          574                 ?
   4             A          575                 ?
   5             A          576                 ?
   6             A          577                 ?
   7             A          578                 ?
   8             A          581                 ?
   9             A          594                 ?
   10            A          596                 ?
   11            A          598                 ?
   12            A          599                 ?
   13            A          600                 ?
   14            A          601                 ?
   15            A          602                 ?
   16            A          604                 ?
   17            A         

In [8]:
resultsd = {
    pdb: {
        pocket: {
            k: v if k != "residues" else v.astype(str).to_dict(orient="list")
            for k, v in pocketd.items()
        }
        for pocket, pocketd in pockets.items()
    } for pdb, pockets in results.items()
}

resultsd

{'7gqu': {'P_0': {'pred': 1,
   'prob': 0.07518999999999999,
   'residues': {'auth_asym_id': ['A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A',
     'A'],
    'auth_seq_id': ['571',
     '572',
     '573',
     '574',
     '575',
     '576',
     '577',
     '578',
     '581',
     '594',
     '596',
     '598',
     '599',
     '600',
     '601',
     '602',
     '604',
     '605',
     '640',
     '668',
     '669',
     '670',
     '671',
     '677',
     '678',
     

In [9]:
import json

In [10]:
with open(resultsf.replace(".pkl", ".json"), "w") as f:
    json.dump(resultsd, f)