## Data preparation

In [1]:
import os
import torch
from Bio.PDB import *
import numpy as np
import pandas as pd
from torch_cluster import knn
from antiberty import AntiBERTyRunner
from torch_geometric.data import Data

import sys 
import warnings
warnings.filterwarnings("ignore")
sys.path.append(r"E:\608\论文\代码\Nanotope\Nanotope")

pdbfile='6h72.pdb'
chainid='C'
NB_MAX_LENGTH = 140
k=32
BACKBONE_ATOMS = ["N", "CA", "C", "O", "CB"]
OUTPUT_SIZE = len(BACKBONE_ATOMS) * 3

# get seq, residues by chain id from Nanobody PDB file
def get_seq_aa(pdb_file, chain_id):
    
    # get chain
    chain = PDBParser(QUIET=True).get_structure(pdb_file, pdb_file)[0][chain_id]
    aa_residues = []
    seq = ""

    for residue in chain.get_residues():
        aa = residue.get_resname()
        if not is_aa(aa) or not residue.has_id('CA'):
            continue
        elif aa == "UNK":  
            seq += "X"
        else:
            seq += Polypeptide.three_to_one(residue.get_resname())
        aa_residues.append(residue)

    return seq, aa_residues

# get Nanobody chain C-a atom coordinates
def generate_coord(pdb_file,chain_id):  
    seq, aa_residues = get_seq_aa(pdb_file, chain_id)
    xyz_matrix = np.zeros((NB_MAX_LENGTH, OUTPUT_SIZE))
    for i in range(len(aa_residues)):
        for j, atom in enumerate(BACKBONE_ATOMS):
            if not (atom=="CB" and seq[i] == "G"):
                xyz_matrix[i][3*j:3*j+3] = aa_residues[i][atom].get_coord()

    return seq,xyz_matrix[:,3:6] #C-a

seq,coord = generate_coord(pdbfile,chainid)
size = len(seq)
coord = torch.tensor(coord)
# get seq embedding by Antiberty model 
Antiberty = AntiBERTyRunner()
embeddings = Antiberty.embed([seq])[0][1:-1]
# padding if len(seq)<140, using zero vetor [0,...,0]
if size<140:
    pad = torch.zeros((140-size),512).cuda()
    embeddings =torch.cat([embeddings,pad],dim=0)

# construct KNN edges
edge_index = knn(coord,coord,k = k)
# construct graph data
data = Data(x = embeddings.unsqueeze(0),edge_index=edge_index,mask=size,batch=torch.Tensor([0]))

## model prediction

In [2]:
from model.GNNnet import Nanotope

model = Nanotope(hidden_channels=512, num_layers=3, num_heads=8,num_bases=8)
model.load_state_dict(torch.load('../model/model_weights/model_weights.pt'))
model.eval().cuda()

#prediction
prediction = model(data)[0:data.mask]

In [10]:
def display_prediction(seq,prediction):
    for a,pre in zip(seq,prediction):
        print(a,'\t','%.3f'%pre.item())

display_prediction(seq,prediction)

Q 	 0.543
V 	 0.753
Q 	 0.632
L 	 0.629
Q 	 0.526
E 	 0.644
S 	 0.577
G 	 0.554
G 	 0.657
G 	 0.528
L 	 0.388
V 	 0.339
Q 	 0.246
A 	 0.295
G 	 0.275
G 	 0.447
S 	 0.386
L 	 0.447
R 	 0.362
L 	 0.469
S 	 0.485
C 	 0.595
A 	 0.206
A 	 0.550
S 	 0.042
G 	 0.191
R 	 0.366
M 	 0.502
F 	 0.295
S 	 0.506
I 	 0.812
N 	 0.603
S 	 0.601
M 	 0.756
G 	 0.851
W 	 0.941
Y 	 0.776
R 	 0.590
Q 	 0.415
A 	 0.309
P 	 0.188
G 	 0.120
K 	 0.127
E 	 0.237
R 	 0.536
E 	 0.314
L 	 0.791
V 	 0.589
A 	 0.784
T 	 0.920
I 	 0.609
S 	 0.852
E 	 0.798
A 	 0.536
G 	 0.308
T 	 0.773
T 	 0.485
T 	 0.836
Y 	 0.683
A 	 0.484
D 	 0.396
S 	 0.338
V 	 0.524
R 	 0.632
G 	 0.348
R 	 0.440
F 	 0.516
T 	 0.479
I 	 0.642
A 	 0.506
R 	 0.730
D 	 0.090
N 	 0.204
A 	 0.163
K 	 0.111
N 	 0.094
T 	 0.324
V 	 0.736
Y 	 0.744
L 	 0.777
Q 	 0.533
M 	 0.762
N 	 0.548
S 	 0.374
L 	 0.445
N 	 0.435
P 	 0.261
E 	 0.407
D 	 0.667
T 	 0.457
A 	 0.405
V 	 0.420
Y 	 0.440
Y 	 0.543
C 	 0.420
N 	 0.775
A 	 0.502
Y 	 0.811
I 	 0.602
Q 	 0.691
