In [14]:
!pwd

/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/220112


In [2]:
import numpy as np
import prody
import torch
from tqdm import tqdm

import sidechainnet as scn
from sidechainnet.research.cluster.train import make_model, get_losses
from sidechainnet.dataloaders.SCNProtein import SCNProtein
from sidechainnet.structure import inverse_trig_transform

In [3]:
# d = scn.load(12, 30, with_pytorch='dataloaders', filter_by_resolution=True, complete_structures_only=True)
d = scn.load(12, 30, with_pytorch='dataloaders', filter_by_resolution=True, complete_structures_only=True, batch_size=1)

SidechainNet was loaded from ./sidechainnet_data/sidechainnet_casp12_30.pkl.
5486 (21.8%) training set entries were excluded based on resolution.
14929 (75.7%) training set entries were excluded based on missing residues.


In [4]:
# chkpt_path = "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/220121/model_2sm463zx_best.chkpt"
chkpt_path = "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/220121/model_1hlw43r9_best.chkpt"

c = torch.load(chkpt_path, map_location=torch.device('cpu'))
print(c.keys())

dict_keys(['model_state_dict', 'settings', 'epoch', 'optimizer_state_dict', 'scheduler_state_dict', 'loss'])


In [5]:
angle_means = d['train'].dataset.angle_means[6:]
args = c['settings']

In [6]:
model = make_model(args, angle_means)

  angle_means = np.arctanh(self.angle_means)


In [7]:
model.load_state_dict(c['model_state_dict'])
model.eval()

SidechainTransformer(
  (ff1): Linear(in_features=99, out_features=256, bias=True)
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
    )
    (linear1): Linear(in_features=256, out_features=512, bias=True)
    (dropout): Dropout(p=0.25, inplace=False)
    (linear2): Linear(in_features=512, out_features=256, bias=True)
    (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.25, inplace=False)
    (dropout2): Dropout(p=0.25, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=512, b

In [15]:
with torch.no_grad():
    proteins = []
    for step, batch in tqdm(enumerate(d['test'])):
        # Select out backbone and sidechaine angles
        bb_angs = batch.angs[:, :, :6]
        sc_angs_true = batch.angs[:, :, 6:]
        sc_angs_true = scn.structure.trig_transform(sc_angs_true).reshape(
            sc_angs_true.shape[0], sc_angs_true.shape[1], 12)  # (B x L x 12)

        # Stack model inputs into a single tensor
        model_in = torch.cat([bb_angs, batch.secs, batch.evos], dim=-1)

        # Move inputs to device
    #     model_in = model_in.to(device)
    #     int_seqs = batch.int_seqs.to(device)
    #     sc_angs_true = sc_angs_true.to(device)

        # Predict sidechain angles given input and sequence
        sc_angs_pred = model(model_in, batch.int_seqs)

        # Reshape prediction to match true sidechain angles
        loss = get_losses("mse", batch, sc_angs_true, sc_angs_pred)
        loss = np.sqrt(loss)

        # Make structures 
        p_true = SCNProtein(coordinates=batch.crds[0].numpy(), angles=batch.angs[0].numpy(),
                       sequence=str(batch.str_seqs[0]),
                       id=batch.pids[0],
                       mask="".join(["+" if x else "-" for x in batch.msks[0]]))
        p_pred = SCNProtein(angles=batch.angs[0].clone(),
                       sequence=str(batch.str_seqs[0]),
                       id=batch.pids[0],
                       mask="".join(["+" if x else "-" for x in batch.msks[0]]))
        sc_angs_pred_untransformed = inverse_trig_transform(sc_angs_pred, n_angles=6)
        # Fill in protein obj with predicted angles instead of true angles
        p_pred.angles[:, 6:] = sc_angs_pred_untransformed[0]

        # Build coords first
        p_pred.build_coords_from_angles()
        try:
            p_true.build_coords_from_angles()
            print(step, p_true.id, "success.", end=" ")
        except ValueError:
            pass
        p_pred.numpy()
        p_true.numpy()
        aligned = prody.calcTransformation(
                p_pred.coords, p_true.coords).apply(p_pred.coords)
        rmsd = prody.calcRMSD(p_true.coords, aligned)

        p_pred.to_pdb(f"{step:0>2}_{p_pred.id.replace('#', '')}_pred_{loss:.3f}mse_{rmsd:.3f}rmsd.pdb", from_angles=True)
        p_true.to_pdb(f"{step:0>2}_{p_true.id.replace('#', '')}_true_{loss:.3f}mse_{rmsd:.3f}rmsd.pdb")
        
        print(loss, rmsd)


        proteins.append((p_true, p_pred, loss, rmsd))


1it [00:00,  2.76it/s]

tensor(0.3701) 20.788504878014432


2it [00:00,  2.74it/s]

tensor(0.3001) 12.48095601578262


3it [00:01,  2.76it/s]

tensor(0.2592) 18.696631464244405


4it [00:01,  2.72it/s]

tensor(0.3158) 20.844808173864163


5it [00:01,  2.65it/s]

tensor(0.3035) 13.740162774598069


6it [00:02,  2.56it/s]

tensor(0.3558) 34.280743083884055


7it [00:02,  2.52it/s]

tensor(0.3122) 45.36113070819784


8it [00:03,  2.47it/s]

tensor(0.3426) 12.963827421911496


9it [00:03,  2.44it/s]

tensor(0.3648) 27.80311768461568


10it [00:03,  2.40it/s]

tensor(0.2906) 10.591344194323138


11it [00:04,  2.35it/s]

tensor(0.2601) 15.850149157573957


12it [00:04,  2.27it/s]

tensor(0.3524) 30.20856467827509


13it [00:05,  2.21it/s]

tensor(0.2774) 19.259235251388517
13 TBM-hard#T0892 success. 

14it [00:06,  1.96it/s]

tensor(0.3828) 1.2799504730137103


15it [00:06,  1.93it/s]

tensor(0.3425) 12.282368347424592


16it [00:07,  1.91it/s]

tensor(0.3036) 22.759672093627547


17it [00:07,  1.89it/s]

tensor(0.2240) 15.329449220626673


18it [00:08,  1.86it/s]

tensor(0.3470) 74.50169219716447
18 TBM#T0893 success. 

19it [00:08,  1.68it/s]

tensor(0.3586) 1.115551995181867
19 FM#T0864 success. 

20it [00:09,  1.54it/s]

tensor(0.3804) 1.2449701824687744


21it [00:10,  1.58it/s]

tensor(0.2912) 25.513265243054565


22it [00:10,  1.59it/s]

tensor(0.3444) 113.28815250535017


23it [00:11,  1.58it/s]

tensor(0.3226) 15.447135881737376


24it [00:12,  1.55it/s]

tensor(0.3474) 80.63052535448777


25it [00:12,  1.54it/s]

tensor(0.2776) 71.16568185483972


26it [00:13,  1.50it/s]

tensor(0.3177) 27.807718981205614


27it [00:14,  1.47it/s]

tensor(0.3279) 86.24021991733065


28it [00:15,  1.44it/s]

tensor(0.3410) 57.04337582525103


29it [00:15,  1.32it/s]

tensor(0.3465) 56.98457032860356


30it [00:16,  1.31it/s]

tensor(0.3315) 77.92394726607358


31it [00:17,  1.29it/s]

tensor(0.3389) 33.1956450587096


32it [00:18,  1.26it/s]

tensor(0.3286) 26.430096176774676


33it [00:19,  1.24it/s]

tensor(0.3094) 32.35326135331359


34it [00:20,  1.23it/s]

tensor(0.3055) 151.28890231173966


35it [00:20,  1.20it/s]

tensor(0.3211) 135.90601413772595


36it [00:21,  1.17it/s]

tensor(0.3662) 34.354630209920586


37it [00:22,  1.15it/s]

tensor(0.3353) 22.17415250135012


38it [00:23,  1.10it/s]

tensor(0.3360) 20.66905147365905


39it [00:24,  1.57it/s]

tensor(0.3413) 97.91578669868719





In [16]:
proteins[13]

(SCNProtein(TBM-hard#T0892, len=193, missing=0, split='None'),
 SCNProtein(TBM-hard#T0892, len=193, missing=0, split='None'),
 tensor(0.3828),
 1.2799504730137103)

In [76]:
pt, pp = proteins[13][:2]

In [77]:
at, ap = pt.angles[:,6:], pp.angles[:,6:]
np.sqrt(np.mean((at[at!=0]-ap[at!=0])**2))

2.2621677

In [78]:
for ang_idx in range(6,12):
    at, ap = pt.angles[:,ang_idx], pp.angles[:,ang_idx]
    m, sd = np.mean(np.abs(at[at!=0]-ap[at!=0])), np.std(np.abs(at[at!=0]-ap[at!=0]))
    print(f"{ang_idx}: {m:.2f} ({np.rad2deg(m):.2f}˚) {sd:.2f} ({np.rad2deg(sd):.2f}˚)" )

6: 0.58 (33.19˚) 1.69 (96.68˚)
7: 1.46 (83.93˚) 1.51 (86.54˚)
8: 1.70 (97.60˚) 1.82 (104.13˚)
9: 1.84 (105.28˚) 1.77 (101.36˚)
10: 3.32 (190.49˚) 2.42 (138.40˚)
11: 0.03 (1.56˚) 0.02 (1.16˚)


In [79]:
at, ap = pt.angles[:,6:], pp.angles[:,6:]
m2, sd2 = np.mean(np.abs(at[at!=0]-ap[at!=0])), np.std(np.abs(at[at!=0]-ap[at!=0]))
print(f"All: {m2:.2f} ({np.rad2deg(m2):.2f}˚) {sd2:.2f} ({np.rad2deg(sd2):.2f}˚)" )

All: 1.34 (76.67˚) 1.82 (104.50˚)


In [73]:
np.rad2deg(m), np.rad2deg(sd) 

(76.67014, 104.5042)

In [24]:
np.rad2deg(1.3179705)

75.51414717274686

In [57]:
proteins[16][1].to_3Dmol()

<py3Dmol.view at 0x7fcb70434910>

In [58]:
proteins[16][0].to_3Dmol()

<py3Dmol.view at 0x7fcb71451550>

In [45]:
p_true.coords

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       ...,
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]], dtype=float32)

In [46]:
p_true.mask

'----------++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---'

In [50]:
p_true.to_pdb("test.pdb")

In [29]:
!rm *rmsd.pdb

In [9]:
p_pred.angles

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.87863186e-01,  1.57923222e-01, -8.28092918e-03,
         3.53989517e-03, -9.03566019e-04,  8.51805322e-04],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         4.30217832e-02,  4.72188406e-02, -2.57466376e-01,
         3.77381593e-03,  8.12203682e-04,  1.72005675e-03],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -1.40030637e-01, -3.02960396e-01, -1.48020806e-02,
         3.83696444e-02,  5.89946110e-04,  6.82185695e-04],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         2.84911264e-02,  2.53418693e-03, -1.64691165e-01,
        -6.71851337e-02,  1.62176881e-03,  1.39512611e-03],
       [ 0.00000000e+00,  0.00000000e+00,  0.0000000

In [None]:
proteins

In [None]:
p_true, p_pred

In [None]:
p_true.angles
p_true.coords

In [None]:
p_true.to_3Dmol()

In [None]:
p_pred.to_3Dmol(from_angles=True)