In [4]:
import numpy as np
import biotite.structure as struc
from biotite.structure.io.pdb import PDBFile

In [5]:
pdb = PDBFile.read('1a10I00.pdb')
structure = pdb.get_structure(model=1)



## Generating k-NN graph

In [8]:
from scipy.spatial.distance import cdist

def knn_edge_index(structure, k=30):
    ca_coords = np.array([a.coord for a in structure if a.atom_name == 'CA'])
    pdist = cdist(ca_coords, ca_coords, metric='euclidean')

    topk_indices = pdist.argsort(axis=1)[:, 1:k+1]
    edge_idx = np.array([[u, v] for u, neighbors in enumerate(topk_indices) for v in neighbors]).T

    return edge_idx

In [9]:
edge_idx = knn_edge_index(structure, k=30)
edge_idx

array([[ 0,  0,  0, ..., 62, 62, 62],
       [ 1,  2, 21, ..., 18, 29, 57]])

## Backbone frame

In [60]:
def to_four_atom_coordinates(atoms):
    coords = np.array([a.coord for a in atoms])
    return coords.reshape(-1, 4, 3)

four_atoms = [a for a in structure if a.atom_name in ['N', 'CA', 'C', 'O']]
four_atom_coords = to_four_atom_coordinates(four_atoms) # (#res, 4, 3)

N_IDX, CA_IDX, C_IDX, O_IDX = 0, 1, 2, 3

In [77]:
u = four_atom_coords[:, CA_IDX] - four_atom_coords[:, N_IDX]
v = four_atom_coords[:, C_IDX] - four_atom_coords[:, CA_IDX]

b = (u - v) / np.linalg.norm((u - v), axis=-1, keepdims=True)

n = np.cross(u, v)
n = n / np.linalg.norm(n, axis=-1, keepdims=True)

q = np.concatenate([
    b[:, :, None],
    n[:, :, None],
    np.cross(b, n)[:, :, None],
], axis=-1)


## Node features

### Distance features

In [32]:
def rbf(dist, d_min=0, d_max=20, d_count=16):
    d_mu = np.linspace(d_min, d_max, d_count).reshape(1, 1, 1, -1)
    d_sigma = (d_max - d_min) / d_count
    dist = dist[:, :, :, None]

    return np.exp(-(dist - d_mu)**2 / (2 * d_sigma**2))

four_atoms = [a for a in structure if a.atom_name in ['N', 'CA', 'C', 'O']]
four_atom_coords = to_four_atom_coordinates(four_atoms) # (#res, 4, 3)

dist = np.sqrt( ( (four_atom_coords[:, None, :, :] - four_atom_coords[:, :, None, :])**2 ).sum(axis=-1) )

triu_indices = [1, 2, 3, 6, 7, 11]
node_dist_feat = rbf(dist).reshape(-1, 4*4, 16)
node_dist_feat = node_dist_feat[:, triu_indices, :].reshape(-1, 6 * 16)

node_dist_feat.shape

(63, 96)

### Angle features

In [50]:
phi, psi, omega = np.nan_to_num( struc.dihedral_backbone(structure), 0.0)

# angles
backbone = structure[struc.filter_backbone(structure)]
n = len(backbone)

triplet_indices = np.array([
    np.arange(n-2),
    np.arange(1, n-1),
    np.arange(2, n)
]).T

theta1 = struc.index_angle(backbone, triplet_indices[range(0, n-2, 3)])
theta2 = struc.index_angle(backbone, triplet_indices[range(1, n-2, 3)])
theta3 = struc.index_angle(backbone, triplet_indices[range(2, n-2, 3)])

node_angle_feat = np.array([
    phi,
    psi,
    omega,
    theta1,
    np.hstack([theta2, 0.0]), # theta2 is not defined for the last residue
    np.hstack([theta3, 0.0]), # theta3 is not defined for the last residue
]).T

node_angle_feat.shape

(63, 6)

### Direction features

In [26]:
rbf(dist).shape

(63, 4, 4, 16)

In [51]:
diff = arr[:, None, :] - arr2[None, :, :]

diff[1][0]

array([2, 2])

## Edge features

### Distance features

In [56]:
four_atoms = [a for a in structure if a.atom_name in ['N', 'CA', 'C', 'O']]
four_atom_coords = to_four_atom_coordinates(four_atoms) # (#res, 4, 3)
src_idx, dst_idx = edge_idx[0], edge_idx[1]
four_atom_coords_i, four_atom_coords_j = four_atom_coords[src_idx], four_atom_coords[dst_idx]
dist = np.sqrt( ( (four_atom_coords_i[:, None, :, :] - four_atom_coords_j[:, :, None, :])**2 ).sum(axis=-1) )

dist = rbf(dist)

dist = dist.reshape(len(dist), -1)
dist.shape

(1890, 256)