In [31]:
import sys
import os
import importlib
import glob
import random
from itertools import combinations

import numpy as np
import scipy
import sklearn

import shap
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns

import mdtraj as md

In [3]:
sys.path.insert(1, "../../python")
sys.path.insert(1, "../../..")
import util
import plotting
import ga_pl

In [4]:
plt.style.use("custom")  # custom style sheet
plt.style.use("muted")  # muted color theme from SciencePlots
cm_seq = sns.cubehelix_palette(
    start=0, rot=-0.70, gamma=0.40, light=0.9, dark=0.1, as_cmap=True, reverse=True
)
cm_seq2 = sns.cubehelix_palette(
    start=0, rot=-0.70, gamma=0.40, light=0.8, dark=0.1, as_cmap=True, reverse=False
)
colors = mpl.colors.to_rgba_array(
    [
        "#364B9A",
        "#4A7BB7",
        "#6EA6CD",
        "#98CAE1",
        "#C2E4EF",
        "#EAECCC",
        "#FEDA8B",
        "#FDB366",
        "#F67E4B",
        "#DD3D2D",
        "#A50026",
    ]
)
cm_div = mpl.colors.LinearSegmentedColormap.from_list("", colors)

# Load Data

In [5]:
cv_trajs = list(
    np.load("../../data/raw_feat/cv_dist_spin_anton.npy", allow_pickle=True)
)
cv_trajs.extend(np.load("../../data/raw_feat/cv_dist_spin_anton2.npy"))

In [6]:
sb_labels = []
for r in ("R217", "R223", "R226", "R229", "R232"):
    for n in ("D129", "D136", "D151", "D164", "E183", "D186"):
        sb_labels.append(f"{r} - {n}")

In [7]:
sb_trajs = list(np.load("../../data/raw_feat/feat2_raw_anton.npy", allow_pickle=True))
sb_trajs.extend(np.load("../../data/raw_feat/feat2_raw_anton2.npy"))

In [8]:
cv_arr = np.concatenate(cv_trajs)
sb_arr = np.concatenate(sb_trajs)
print(cv_arr.shape, sb_arr.shape)

(4150115, 2) (4150115, 60)


In [9]:
# load committors
q = np.load("../../data/feat2_dist_du_anton2/qp_downup_3.npy", allow_pickle=True)[
    8
]  # 50 ns
w = np.load(
    "../../data/feat2_dist_du_anton2/weights_3_feat5ivac.npy", allow_pickle=True
)[0]

In [52]:
X = torch.Tensor(np.hstack((cv_arr, sb_arr)))
y = torch.Tensor(np.concatenate(q)).unsqueeze(-1)
print(X.shape, y.shape)

torch.Size([4150115, 62]) torch.Size([4150115, 1])


In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [48]:
class MultiLayerNet(pl.LightningModule):
    """Neural network with a multiple hidden layers
    and sigmoid activation.

    Parameters
    ----------
    """

    def __init__(
        self, input_dim, hidden_dim=30, output_dim=1, n_hidden=5, verbose=True
    ):
        super(MultiLayerNet, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        for _ in range(n_hidden):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_dim, output_dim))
        layers.append(nn.Sigmoid())
        self.net = nn.Sequential(*layers)
        self.verbose = verbose

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        X, y = batch
        # Compute prediction and loss
        pred = self.net(X)
        loss = F.mse_loss(pred, y)

        if self.verbose:
            # if batch_idx % 100 == 0:
            #     loss, current = loss.item(), batch * len(X)
            #     print(f"loss: {loss:>7f}  [{current:>5d}/]")
            self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=4e-3)
        return optimizer
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        pred = self.net(x)
        loss = F.mse_loss(pred, y)
        self.log("val_loss", loss)
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        pred = self.net(x)
        loss = F.mse_loss(pred, y)
        return loss

In [54]:
input_dim = X.shape[-1]
hidden_dim = 30
n_layers = 3
mlp = MultiLayerNet(input_dim, hidden_dim=hidden_dim, n_hidden=n_layers)

In [55]:
# 80/15/5 train/val/test split
train_X, val_X, train_y, val_y = sklearn.model_selection.train_test_split(
    X, y, test_size=0.2, random_state=123
)
val_X, test_X, val_y, test_y = sklearn.model_selection.train_test_split(val_X, val_y, test_size=0.25, random_state=123)
print(train_X.shape, val_X.shape, test_X.shape, train_y.shape, val_y.shape, test_y.shape)

torch.Size([3320092, 62]) torch.Size([622517, 62]) torch.Size([207506, 62]) torch.Size([3320092, 1]) torch.Size([622517, 1]) torch.Size([207506, 1])


In [56]:
batch_size = 16384
train_dataset = ga_pl.CommittorDataset(train_X, train_y)
train_batches = DataLoader(train_dataset, batch_size=batch_size, num_workers=32)
val_dataset = ga_pl.CommittorDataset(val_X, val_y)
val_batches = DataLoader(val_dataset, batch_size=batch_size, num_workers=32)
test_dataset = ga_pl.CommittorDataset(test_X, test_y)
test_batches = DataLoader(test_dataset, batch_size=batch_size, num_workers=32)

In [57]:
trainer = pl.Trainer(accelerator='gpu', devices=1)
trainer.fit(model=mlp, train_dataloader=train_batches, val_dataloaders=val_batches)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
`trainer.fit(train_dataloader)` is deprecated in v1.4 and will be removed in v1.6. Use `trainer.fit(train_dataloaders)` instead. HINT: added 's'
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Set SLURM handle signals.

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 4.7 K 
------------------------------------
4.7 K     Trainable params
0         Non-trainable params
4.7 K     Total params
0.019     Total estimated model params size (MB)
Checkpoint directory /project/dinner/scguo/ci-vsd/notebooks/nn/lightning_logs/version_3845523/checkpoints exists and is not empty.


Epoch 0:  84%|████████▍ | 203/241 [00:04<00:00, 47.50it/s, loss=0.00207, v_num=3845523, train_loss_step=0.00198]
Validating: 0it [00:00, ?it/s][A
                                                              
Epoch 0:  91%|█████████▏| 220/241 [00:05<00:00, 37.75it/s, loss=0.00207, v_num=3845523, train_loss_step=0.00198]
Epoch 0: 100%|██████████| 241/241 [00:05<00:00, 40.39it/s, loss=0.00207, v_num=3845523, train_loss_step=0.00198]
Epoch 1:  84%|████████▍ | 203/241 [00:03<00:00, 52.92it/s, loss=0.00141, v_num=3845523, train_loss_step=0.00148, train_loss_epoch=0.0084]
Validating: 0it [00:00, ?it/s][A
                                                              
Epoch 1:  90%|█████████ | 217/241 [00:05<00:00, 40.09it/s, loss=0.00141, v_num=3845523, train_loss_step=0.00148, train_loss_epoch=0.0084]
Epoch 1: 100%|██████████| 241/241 [00:05<00:00, 43.39it/s, loss=0.00141, v_num=3845523, train_loss_step=0.00148, train_loss_epoch=0.0084]
Epoch 2:  84%|████████▍ | 203/241 [00:03<00:00, 55.65

Detected KeyboardInterrupt, attempting graceful shutdown...


In [58]:
trainer.validate(mlp, val_batches)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]



Validating: 0it [00:00, ?it/s][A
                                                          [A
Validating:   0%|          | 0/38 [00:00<?, ?it/s][A
Validating:   3%|▎         | 1/38 [00:01<00:59,  1.62s/it][A
Validating: 100%|██████████| 38/38 [00:01<00:00, 28.19it/s][A--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_loss': 0.00020720479369629174}
--------------------------------------------------------------------------------

                                                           [A

[{'val_loss': 0.00020720479369629174}]

In [61]:
trainer.test(mlp, test_batches)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 100%|██████████| 13/13 [00:01<00:00, 11.12it/s]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 13/13 [00:01<00:00,  8.28it/s]
Epoch 234: 100%|██████████| 241/241 [00:20<00:00, 11.59it/s, loss=0.000198, v_num=3845523, train_loss_step=0.00017, train_loss_epoch=0.000182]

In [63]:
mse = 0
for X, y in iter(test_batches):
    pred = mlp(X)
    with torch.no_grad():
        mse += F.mse_loss(y, pred)
mse /= len(test_batches)

In [80]:
print(f"MSE: {mse:.3e} / RMSE: {np.sqrt(mse):.<3f}")

MSE: 2.056e-04 / RMSE: 0.014340
