# Geometric Units

This notebook is to validate that the geometric units are constructed correctly and can reproduce the 3D protein structure given ground truth bond lengths, bond angles and torsion angles.

In [1]:
import torch
import torch.nn as nn
import os
import utils
from model import pair_dist

Using TensorFlow backend.


In [2]:
data_path = os.curdir + '/data/'
pdb_path = os.curdir + '/data/pdb/structures/'

In [3]:
#First find a pdb file with no missing coordinates
chain_1 = utils.load_array(data_path+'proteins_1.bc')

FileNotFoundError: [Errno 2] No such file or directory: './data/proteins_1.bc/meta/sizes'

I know that proteins 2 (1zur) and 19 (4a56) in chain_1, have no missing coordinates, so they're ideal candidates.

In [16]:
names = ['1zur.pdb', '4a56.pdb']
t_angles, b_angles, b_len = torch.zeros((163, 2, 3)), torch.zeros((163, 2, 3)), torch.zeros((163, 2, 3))

for ix, name in enumerate(names):
    ta, ba, bl = utils.gt_dihedral_angles(pdb_path+name)
    t_angles[:len(ta), ix] = ta
    b_angles[:len(ba), ix] = ba
    b_len[:len(bl), ix] = bl

In [17]:
#get the coordinates of first three atoms (A,B,C) for the two test samples
A1 = torch.tensor(chain_1[2][2][0], dtype=torch.float)
A2 = torch.tensor(chain_1[19][2][0], dtype=torch.float)
B1 = torch.tensor(chain_1[2][2][1], dtype=torch.float)
B2 = torch.tensor(chain_1[19][2][1], dtype=torch.float)
C1 = torch.tensor(chain_1[2][2][2], dtype=torch.float)
C2 = torch.tensor(chain_1[19][2][2], dtype=torch.float)

In [18]:
A = torch.stack([A1,A2], 0)
B = torch.stack([B1,B2], 0)
C = torch.stack([C1,C2], 0)
pred_coords = torch.stack([A, B, C], 0)

for ix, triplet in enumerate(t_angles):
    for i in range(3):
        #coordinates of last three atoms
        A, B, C = pred_coords[-3], pred_coords[-2], pred_coords[-1]
        #internal coordinates
        T = b_angles[ix, :, i]
        R = b_len[ix, :, i]
        P = triplet[:, i]

        #note the negative in the first term isn't referenced in the original paper
        #it may have something to do with how BioPDB calculates angles
        D2 = torch.stack([-R*torch.ones(P.size())*torch.cos(T), 
                          R*torch.cos(P)*torch.sin(T),
                          R*torch.sin(P)*torch.sin(T)], dim=1)

        #bsx3 one triplet for each sample in the batch
        BC = C - B
        bc = BC/torch.norm(BC, 2, dim=1, keepdim=True)

        AB = B - A

        N = torch.cross(AB, bc)
        n = N/torch.norm(N, 2, dim=1, keepdim=True)

        M = torch.stack([bc, torch.cross(n, bc), n], dim=2)

        D = torch.bmm(M, D2.view(-1,3,1)).squeeze() + C
        pred_coords = torch.cat([pred_coords, D.view(1,-1,3)])

In [19]:
#evaluate on 1zur
gt_coords = torch.tensor(chain_1[2][2])
print(pair_dist(pred_coords[:, 0])[:7, :7])
print(pair_dist(gt_coords)[:7, :7])

tensor([[ 0.0000,  1.4874,  2.5037,  3.6471,  4.9417,  6.0591,  7.2858],
        [ 1.4874,  0.0000,  1.5289,  2.4205,  3.8290,  4.7594,  6.0301],
        [ 2.5037,  1.5289,  0.0000,  1.3304,  2.4853,  3.6839,  4.8436],
        [ 3.6471,  2.4205,  1.3304,  0.0000,  1.4620,  2.4165,  3.6473],
        [ 4.9417,  3.8290,  2.4853,  1.4620,  0.0000,  1.5286,  2.4434],
        [ 6.0591,  4.7594,  3.6839,  2.4165,  1.5286,  0.0000,  1.3305],
        [ 7.2858,  6.0301,  4.8436,  3.6473,  2.4434,  1.3305,  0.0000]])
tensor([[ 0.0000,  1.4875,  2.5037,  3.6471,  4.9417,  6.0591,  7.2858],
        [ 1.4875,  0.0000,  1.5288,  2.4205,  3.8289,  4.7595,  6.0301],
        [ 2.5037,  1.5288,  0.0000,  1.3304,  2.4853,  3.6839,  4.8436],
        [ 3.6471,  2.4205,  1.3304,  0.0000,  1.4620,  2.4165,  3.6473],
        [ 4.9417,  3.8289,  2.4853,  1.4620,  0.0000,  1.5286,  2.4434],
        [ 6.0591,  4.7595,  3.6839,  2.4165,  1.5286,  0.0000,  1.3305],
        [ 7.2858,  6.0301,  4.8436,  3.6473,  2.44

In [20]:
#evaluate on 4a56
gt_coords = torch.tensor(chain_1[19][2])
print(pair_dist(pred_coords[:, 1])[:7, :7])
print(pair_dist(gt_coords)[:7, :7])

tensor([[ 0.0000,  1.4995,  2.4238,  3.4181,  4.6396,  5.4634,  6.4754],
        [ 1.4995,  0.0000,  1.5253,  2.4579,  3.8404,  4.4748,  5.6402],
        [ 2.4238,  1.5253,  0.0000,  1.3442,  2.4518,  3.0594,  4.1571],
        [ 3.4181,  2.4579,  1.3442,  0.0000,  1.4801,  2.4726,  3.6662],
        [ 4.6396,  3.8404,  2.4518,  1.4801,  0.0000,  1.5339,  2.4456],
        [ 5.4634,  4.4748,  3.0594,  2.4726,  1.5339,  0.0000,  1.3372],
        [ 6.4754,  5.6402,  4.1571,  3.6662,  2.4456,  1.3372,  0.0000]])
tensor([[ 0.0000,  1.4995,  2.4238,  3.4181,  4.6396,  5.4634,  6.4754],
        [ 1.4995,  0.0000,  1.5253,  2.4579,  3.8404,  4.4748,  5.6402],
        [ 2.4238,  1.5253,  0.0000,  1.3442,  2.4518,  3.0594,  4.1571],
        [ 3.4181,  2.4579,  1.3442,  0.0000,  1.4801,  2.4726,  3.6662],
        [ 4.6396,  3.8404,  2.4518,  1.4801,  0.0000,  1.5339,  2.4456],
        [ 5.4634,  4.4748,  3.0594,  2.4726,  1.5339,  0.0000,  1.3372],
        [ 6.4754,  5.6402,  4.1571,  3.6662,  2.44