# Making predictions

This notebook shows how to use a model to predict missing residues

We begin by setting up the model classes. They have to match what we defined during machine learning.

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class ModelFC(nn.Module):
    def __init__(self):
        super(ModelFC, self).__init__()
        # define layers to be used
        self.fc_1 = nn.Linear(30,256)
        self.fc_2 = nn.Linear(256,256)
        self.fc_3 = nn.Linear(256, 64)         
        self.fc_f = nn.Linear(64, 3)           
    def forward(self, x):
        # back-propagation is done automatically
        x = x.reshape(len(x),-1)
        #print(x.size())
        x = F.relu(self.fc_1(x))
        x = F.relu(self.fc_2(x)) 
        x = F.relu(self.fc_3(x))
        x = self.fc_f(x)         
        return x
    def initialize_weights(self, m):
        # initialization of weights, setting them to zero is not good
        if hasattr(m, 'weight') and m.weight.dim() > 1:
            nn.init.xavier_uniform_(m.weight.data)
            
class Model1D(nn.Module):
    def __init__(self):
        super(Model1D, self).__init__()
        # define layers to be used
        self.conv_1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv_2 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)
        self.conv_3 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)
        self.conv_4 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)
        self.conv_f = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)
        # dimensional flattening
        self.flatten = nn.Flatten(start_dim=1) 
        # fully connected layers
        self.fc_1 = nn.Linear(480,256)
        self.fc_2 = nn.Linear(256,64)
        self.fc_f = nn.Linear(64, 3)           
    def forward(self, x):
        # back-propagation is done automatically
        x = self.conv_1(x)
        x = F.relu(self.conv_2(x))
        x = F.relu(self.conv_3(x))
        x = F.relu(self.conv_4(x))
        x = F.relu(self.conv_f(x))
        x = self.flatten(x)
        #print(x.size())
        x = F.relu(self.fc_1(x))
        x = F.relu(self.fc_2(x)) 
        x = self.fc_f(x)       
        #print(x.size())
        return x
    def initialize_weights(self, m):
        # initialization of weights, setting them to zero is not good
        if hasattr(m, 'weight') and m.weight.dim() > 1:
            nn.init.xavier_uniform_(m.weight.data)
            
class Model1D3(nn.Module):
    def __init__(self):
        super(Model1D3, self).__init__()
        # define layers to be used
        self.conv_1 = nn.Conv1d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv_2 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.conv_f = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        # dimensional flattening
        self.flatten = nn.Flatten(start_dim=1) 
        # fully connected layers
        self.fc_1 = nn.Linear(640,256)
        self.fc_2 = nn.Linear(256,64)
        self.fc_f = nn.Linear(64, 3)           
    def forward(self, x):
        # back-propagation is done automatically
        x = self.conv_1(x)
        x = F.relu(self.conv_2(x))
        x = F.relu(self.conv_f(x))
        x = self.flatten(x)
        #print(x.size())
        x = F.relu(self.fc_1(x))
        x = F.relu(self.fc_2(x)) 
        x = self.fc_f(x)       
        #print(x.size())
        return x
    def initialize_weights(self, m):
        # initialization of weights, setting them to zero is not good
        if hasattr(m, 'weight') and m.weight.dim() > 1:
            nn.init.xavier_uniform_(m.weight.data)
            
class Model1D3_ca_n_co_cb(nn.Module):
    def __init__(self):
        super(Model1D3_ca_n_co_cb, self).__init__()
        # define layers to be used
        self.conv_1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.conv_2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv_3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv_4 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv_f = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        # dimensional flattening
        self.flatten = nn.Flatten(start_dim=1) 
        # fully connected layers
        self.fc_1 = nn.Linear(640,128)
        self.fc_2 = nn.Linear(128,32)
        self.fc_f = nn.Linear(32, 3)

        self.cn_1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.cn_2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cn_3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cn_4 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cn_f = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        # fully connected layers
        self.fcn_1 = nn.Linear(704,128)
        self.fcn_2 = nn.Linear(128,32)
        self.fcn_f = nn.Linear(32, 3)
        
        self.cco_1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.cco_2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cco_3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cco_4 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.cco_f = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        # fully connected layers
        self.fcco_1 = nn.Linear(768,128)
        self.fcco_2 = nn.Linear(128,32)
        self.fcco_f = nn.Linear(32, 6)

        self.ccb_1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.ccb_2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.ccb_3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.ccb_4 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.ccb_f = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        # fully connected layers
        self.fccb_1 = nn.Linear(896,128)
        self.fccb_2 = nn.Linear(128,32)
        self.fccb_f = nn.Linear(32, 3)

    def forward(self, x):
        # back-propagation is done automatically
        xinp=x
        x = self.conv_1(xinp)
        x = F.relu(self.conv_2(x))
        x = F.relu(self.conv_3(x))
        x = F.relu(self.conv_4(x))
        x = F.relu(self.conv_f(x))
        x = self.flatten(x)
        x = F.relu(self.fc_1(x))
        x = F.relu(self.fc_2(x)) 
        ca = self.fc_f(x)       
       
        car=ca.reshape(-1,3,1)
        x = torch.cat((car,xinp),2)
        x = self.cn_1(x)
        x = F.relu(self.cn_2(x))
        x = F.relu(self.cn_3(x))
        x = F.relu(self.cn_4(x))
        x = F.relu(self.cn_f(x))
        x = self.flatten(x)
        x = F.relu(self.fcn_1(x))
        x = F.relu(self.fcn_2(x)) 
        n = self.fcn_f(x)       
        
        nr=n.reshape(-1,3,1)
        x = torch.cat((car,nr,xinp),2)
        x = self.cco_1(x)
        x = F.relu(self.cco_2(x))
        x = F.relu(self.cco_3(x))
        x = F.relu(self.cco_4(x))
        x = F.relu(self.cco_f(x))
        x = self.flatten(x)
        #print(x.size())
        x = F.relu(self.fcco_1(x))
        x = F.relu(self.fcco_2(x)) 
        co = self.fcco_f(x)       
        c = co[:,0:3]
        ox = co[:,3:6]
        
        cr=c.reshape(-1,3,1)
        oxr=ox.reshape(-1,3,1)
        x = torch.cat((car,nr,cr,oxr,xinp),2)
        x = self.ccb_1(x)
        x = F.relu(self.ccb_2(x))
        x = F.relu(self.ccb_3(x))
        x = F.relu(self.ccb_4(x))
        x = F.relu(self.ccb_f(x))
        x = self.flatten(x)
        #print(x.size())
        x = F.relu(self.fccb_1(x))
        x = F.relu(self.fccb_2(x)) 
        x = self.fccb_f(x)       
               
        x = torch.cat((ca,n,c,ox,x),1)
        return x
    
    def initialize_weights(self, m):
        # initialization of weights, setting them to zero is not good
        if hasattr(m, 'weight') and m.weight.dim() > 1:
            nn.init.xavier_uniform_(m.weight.data)



We define and load a model saved from the machine learning

In [None]:
#model=Model1D3()
#model.load_state_dict(torch.load('../machinelearning/ca_predict.dict'))

model = Model1D3_ca_n_co_cb()
model.load_state_dict(torch.load(f'../machinelearning/LYS_1d3_ca_n_co_cb_predict_aa.dict'))


Let's use the model!

In the example we take a PDB, select a chain and a residue, and pretend that we remove it.
We will then check how accurately we can predict the missing atoms (for now just $C\alpha$).

In [None]:
import os
import mdtraj as md
import numpy as np

def project(vector,midpoint,xloc,yloc,zloc):
    vmid=vector-midpoint
    return [np.dot(vmid,xloc), np.dot(vmid,yloc), np.dot(vmid,zloc)]

# can be any PDB
pdb_fn="4hhb.pdb"
pdb=md.load_pdb(pdb_fn)

# select chain and residue to be "removed"
# unfortunately, mdtraj does not support chain ID letters, need to use index
# in this case, chainid 1 is chain B

selectchain=1
selectresidue=30

# atoms from residue to be "removed"
ca_remove_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue} and name CA")
ca_remove=pdb.xyz[0][ca_remove_inx][0]
n_remove_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue} and name N")
n_remove=pdb.xyz[0][n_remove_inx][0]
c_remove_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue} and name C")
c_remove=pdb.xyz[0][c_remove_inx][0]
o_remove_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue} and name O")
o_remove=pdb.xyz[0][o_remove_inx][0]
cb_remove_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue} and name CB")
cb_remove=pdb.xyz[0][cb_remove_inx][0]



# atoms from rest of structure needed to make prediction 
ca_m1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue-1} and name CA")
ca_m2_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue-2} and name CA")
ca_p1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue+1} and name CA")
ca_p2_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue+2} and name CA")

c_m1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue-1} and name C")
o_m1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue-1} and name O")
n_m1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue-1} and name N")

c_p1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue+1} and name C")
o_p1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue+1} and name O")
n_p1_inx=pdb.topology.select(f"chainid {selectchain} and residue {selectresidue+1} and name N")

ca_m1=pdb.xyz[0][ca_m1_inx][0]
ca_m2=pdb.xyz[0][ca_m2_inx][0]
ca_p1=pdb.xyz[0][ca_p1_inx][0]
ca_p2=pdb.xyz[0][ca_p2_inx][0]

c_m1=pdb.xyz[0][c_m1_inx][0]
o_m1=pdb.xyz[0][o_m1_inx][0]
n_m1=pdb.xyz[0][n_m1_inx][0]

c_p1=pdb.xyz[0][c_p1_inx][0]
o_p1=pdb.xyz[0][o_p1_inx][0]
n_p1=pdb.xyz[0][n_p1_inx][0]

# construct local coordinate system - must match generation of features
m=0.5*(ca_m1+ca_p1)
l1=ca_p1-ca_m1
l2=(ca_m2-ca_m1)+(ca_p2-ca_p1)
l3=np.cross(l1,l2)
ol2=np.cross(l3,l1)

xlocal=l1/np.linalg.norm(l1)
ylocal=ol2/np.linalg.norm(ol2)
zlocal=l3/np.linalg.norm(l3)

# project atoms onto local coordinate system
ca_m1_local=project(ca_m1,m,xlocal,ylocal,zlocal)
ca_m2_local=project(ca_m2,m,xlocal,ylocal,zlocal)
ca_p1_local=project(ca_p1,m,xlocal,ylocal,zlocal)
ca_p2_local=project(ca_p2,m,xlocal,ylocal,zlocal)

o_m1_local=project(o_m1,m,xlocal,ylocal,zlocal)
o_p1_local=project(o_p1,m,xlocal,ylocal,zlocal)
n_m1_local=project(n_m1,m,xlocal,ylocal,zlocal)
n_p1_local=project(n_p1,m,xlocal,ylocal,zlocal)
c_m1_local=project(c_m1,m,xlocal,ylocal,zlocal)
c_p1_local=project(c_p1,m,xlocal,ylocal,zlocal)
            
# this is our input feature string in the original format
input_feature=ca_m1_local+ca_m2_local+ca_p1_local+ca_p2_local+ \
    o_m1_local+o_p1_local+n_m1_local+n_p1_local+c_m1_local+c_p1_local
#print(input_feature)

# this is reshaped into x / y / z as expected by Model1D3
inputxyz =input_feature[0::3]+input_feature[1::3]+input_feature[2::3]
inputxyzdata=np.reshape(np.array(inputxyz),(1,3,-1))
#print(inputxyzdata)

# now we run the model
input=torch.tensor(inputxyzdata.astype(np.float32))
output = model(input)
#print(output)

ca_remove_local_model=np.array([output[0][0].item(), output[0][1].item(), output[0][2].item()])
n_remove_local_model=np.array([output[0][3].item(), output[0][4].item(), output[0][5].item()])
c_remove_local_model=np.array([output[0][6].item(), output[0][7].item(), output[0][8].item()])
o_remove_local_model=np.array([output[0][9].item(), output[0][10].item(), output[0][11].item()])
cb_remove_local_model=np.array([output[0][12].item(), output[0][13].item(), output[0][14].item()])

#print(ca_remove_local_model)

# compare with projection of actual atom
ca_remove_local=project(ca_remove,m,xlocal,ylocal,zlocal)
#print(ca_remove_local)

# converting local prediction back to Cartesian coordinates in original PDB frame
ca_predict=ca_remove_local_model[0]*xlocal + ca_remove_local_model[1]*ylocal + ca_remove_local_model[2]*zlocal + m
n_predict=n_remove_local_model[0]*xlocal + n_remove_local_model[1]*ylocal + n_remove_local_model[2]*zlocal + m
c_predict=c_remove_local_model[0]*xlocal + c_remove_local_model[1]*ylocal + c_remove_local_model[2]*zlocal + m
o_predict=o_remove_local_model[0]*xlocal + o_remove_local_model[1]*ylocal + o_remove_local_model[2]*zlocal + m
cb_predict=cb_remove_local_model[0]*xlocal + cb_remove_local_model[1]*ylocal + cb_remove_local_model[2]*zlocal + m

print(f"removed Calpha with coordinates: {ca_remove}")
print(f"prediction: {ca_predict}")

print(f"removed N with coordinates: {n_remove}")
print(f"prediction: {n_predict}")

print(f"removed C with coordinates: {c_remove}")
print(f"prediction: {c_predict}")

print(f"removed O with coordinates: {o_remove}")
print(f"prediction: {o_predict}")

print(f"removed Cbeta with coordinates: {cb_remove}")
print(f"prediction: {cb_predict}")



In [None]:
import mdtraj as md
import nglview as nv

#pdb_fn="4hhb.pdb"
#pdb=md.load_pdb(pdb_fn)
#selectresidue=30

ratom=pdb.topology.atom(ca_remove_inx[0])
top=md.Topology()
newchain=top.add_chain()
newres=top.add_residue(ratom.residue.name,newchain)
newatom=top.add_atom("CA",ratom.element,newres)
newatom=top.add_atom("N",ratom.element,newres)
newatom=top.add_atom("C",ratom.element,newres)
newatom=top.add_atom("O",ratom.element,newres)
newatom=top.add_atom("CB",ratom.element,newres)

xyz=[ca_predict]
xyz.append(n_predict)
xyz.append(c_predict)
xyz.append(o_predict)
xyz.append(cb_predict)

model=md.Trajectory(xyz,top)

view = nv.NGLWidget(nv.MDTrajTrajectory(pdb))
view.clear_representations()
#view.add_cartoon('protein',color="grey")
view.add_licorice(':B',color="blue")
view.add_licorice(f'{selectresidue}:B',color="green")
view.add_spacefill(f'{selectresidue}:B and .CA', radius="0.5", color="red")

view.add_trajectory(nv.MDTrajTrajectory(model))
view[1].add_spacefill(radius="0.5", color="orange")

view[0].center(f'{selectresidue}:B and .CA')
view.camera='orthographic'

view