In [1]:
!git clone https://github.com/hodakamori/torch-tutorial

Cloning into 'torch-tutorial'...
remote: Enumerating objects: 64, done.[K
remote: Counting objects: 100% (64/64), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 64 (delta 16), reused 58 (delta 13), pack-reused 0[K
Receiving objects: 100% (64/64), 9.53 MiB | 28.88 MiB/s, done.
Resolving deltas: 100% (16/16), done.


In [3]:
!pip install graph-transformer-pytorch rdkit MDAnalysis

Collecting graph-transformer-pytorch
  Downloading graph_transformer_pytorch-0.1.1-py3-none-any.whl (4.3 kB)
Collecting rdkit
  Downloading rdkit-2023.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting MDAnalysis
  Downloading MDAnalysis-2.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops>=0.3 (from graph-transformer-pytorch)
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rotary-embedding-torch (from graph-transformer-pytorch)
  Downloading rotary_embedding_torch-0.5.3-py3-none-any.whl (5.3 kB)
Collecting GridDataFormats>=0.

In [4]:
!cp torch-tutorial/diffusion_two_for_one/* .

In [5]:
import torch
import random
from torch.utils.data import DataLoader
from dataset import CGCoordsDataset
from model import Net
from utils import add_diffusion_noise



In [6]:
device = torch.device("cuda")
topology_path = "./ala2_cg.pdb"
traj_path = "./ala2_cg.xtc"
dataset = CGCoordsDataset(topology_path, traj_path)
print(len(dataset))

MAX_EPOCHS = 5
BATCH_SIZE = 1024
MAX_NOISE_LEVEL = 25

1000




In [7]:
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
bonds = dataset.bonds
model = Net(num_atoms=5, num_node_features=64)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=MAX_EPOCHS, eta_min=1e-5
)
loss_func = torch.nn.MSELoss()

for epoch in range(MAX_EPOCHS):
    history = []
    for iter, (indices, coords) in enumerate(dataloader):
        coords = coords.to(device)
        coords.requires_grad_()
        noise_levels = torch.randint(
            1, MAX_NOISE_LEVEL + 1, (coords.shape[0], coords.shape[1], 1)
        )
        noised_coordinates = add_diffusion_noise(coords, noise_levels=noise_levels)
        noise_true = noised_coordinates - coords
        energy = model(indices, noised_coordinates, bonds, noise_levels=noise_levels)
        if coords.grad is not None:
            coords.grad.zero_()
        energy.backward(retain_graph=True)
        noise_pred = coords.grad
        loss = loss_func(noise_true, noise_pred).mean()
        print(f"{epoch=}, {iter=}, loss={loss.detach().cpu().numpy()}")
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        history.append(loss.detach().cpu().numpy())