In [1]:
%matplotlib inline

In [2]:
import os
import asyncio
import numpy as np
import matplotlib.pyplot as plt
import mdtraj as mdt
import MDAnalysis as mda
import arcd
import arcd.distributed as arcdd

Tensorflow/Keras not available


In [3]:
# first write out the inital TP as trr for gmx
initial_tp_mdt = mdt.load("../capped_alanine_dipeptide/ala_400K_TP_low_barrier.h5")

In [4]:
#initial_tp_mdt.save("/home/think/scratch/arcd_distributed/gmx_infiles/ala_400K_TP_low_barrier.trr")

# now the actual setup

In [5]:
n_chains = 2  # results in 4 gmx engines
scratch_dir = "/home/think/scratch/arcd_distributed/"

In [6]:
storage = arcd.Storage(os.path.join(scratch_dir, "storage.h5"))

# TODO: this is not very userfriendly!
 we should find a better way to initialize the storage (and also to pass the single modelstores to the movers)

In [7]:
storage.initialize_central_memory(n_chains=n_chains)

In [8]:
storage.central_memory[0].modelstore

<arcd.base.storage.RCModelRack at 0x7f3c60275b80>

In [9]:
# make a list of engines for the PathMovers
gro = os.path.join(scratch_dir, "gmx_infiles/conf.gro")
top = os.path.join(scratch_dir, "gmx_infiles/topol.top")

mdp = arcdd.MDP(os.path.join(scratch_dir, "gmx_infiles/md.mdp")) 

In [10]:
engines = [[arcdd.GmxEngine(gro_file=gro, top_file=top) for _ in range(2)] for _ in range(2)]
# 2 way shooting: 2 engines per mover, 2 movers because we have 2 chains

In [11]:
# states
def alpha_R(traj):
    traj = mdt.load(traj.trajectory_file, 
                    top=os.path.join(scratch_dir, "gmx_infiles/conf.gro"),  # mdt can not work with tprs, so we use theinitial gro for now
                    )
    psi = mdt.compute_dihedrals(traj, indices=[[6,8,14,16]])[:, 0]
    phi = mdt.compute_dihedrals(traj, indices=[[4,6,8,14]])[:, 0]
    state = np.full_like(psi, False, dtype=bool)
    # phi: -pi -> 0 
    # psi: > -50 but smaller 30 degree
    deg = 180/np.pi
    state[(phi <= 0) & (-50/deg <= psi) & (psi <= 30/deg)] = True
    return state


wrapped_alphaR = arcdd.trajectory.TrajectoryFunctionWrapper(alpha_R)


def C7_eq(traj):
    traj = mdt.load(traj.trajectory_file, 
                    top=os.path.join(scratch_dir, "gmx_infiles/conf.gro"),  # mdt can not work with tprs, so we use theinitial gro for now
                    )
    psi = mdt.compute_dihedrals(traj, indices=[[6,8,14,16]])[:, 0]
    phi = mdt.compute_dihedrals(traj, indices=[[4,6,8,14]])[:, 0]
    state = np.full_like(psi, False, dtype=bool)
    # phi: -pi -> 0 
    # psi: > -50 but smaller 30 degree
    deg = 180/np.pi
    state[(phi <= 0) & (((120/deg <= psi) & (psi <= 180/deg)) | (-160/deg <= psi))] = True
    return state


wrapped_C7_eq = arcdd.TrajectoryFunctionWrapper(C7_eq)

In [12]:
# descriptor_transform and model

# internal coordinates
def ic_transform(traj):
    traj = mdt.load(traj.trajectory_file, 
                    top=os.path.join(scratch_dir, "gmx_infiles/conf.gro"),  # mdt can not work with tprs, so we use theinitial gro for now
                    )
    pairs, triples, quadruples = arcd.coords.internal.generate_indices(traj.topology, source_idx=0)

    descriptors = arcd.coords.internal.transform(traj, pairs=pairs, triples=triples, quadruples=quadruples)
    return descriptors

wrapped_transform = arcdd.TrajectoryFunctionWrapper(ic_transform)

In [13]:
initial_TP = arcdd.Trajectory(os.path.join(scratch_dir, "gmx_infiles/ala_400K_TP_low_barrier.trr"),
                              os.path.join(scratch_dir, "gmx_infiles/conf.gro"))

In [14]:
descript = await wrapped_transform(initial_TP)

In [15]:
cv_ndim = descript.shape[1]

In [16]:
import torch.nn.functional as F
import torch

In [17]:
ffnet = arcd.pytorch.networks.FFNet(n_in=cv_ndim,
                                    n_hidden=[int((cv_ndim) / i) for i in range(1,5)],  # 4 hidden layer pyramidal network
                                    activation=F.elu,
                                   )

resnet = arcd.pytorch.networks.ResNet(n_units=int(cv_ndim/ 4), n_blocks=2)

torch_model = arcd.pytorch.networks.ModuleStack(n_out=1,  # using a single output we will predict only p_B and use a binomial loss
                                                          # we could have also used n_out=n_states to use a multinomial loss and predict all states,
                                                          # but this is probably only worthwhile if n_states > 2 as it would increase the number of free parameters in the NN
                                                modules=[ffnet, resnet],  # modules is a list of initialized torch.nn.Modules from arcd.pytorch.networks
                                               )

# move model to GPU if CUDA is available
if torch.cuda.is_available():
    torch_model = torch_model.to('cuda')

# choose and initialize an optimizer to train the model
optimizer = torch.optim.Adam(torch_model.parameters(), lr=1e-3)

In [18]:
# we take an ExpectedEfficiencyPytorchRCModel,
# this RCmodel scales the learning rate by the expected efficiency factor (1 - n_TP_true / n_TP_expected)**2
model = arcd.pytorch.EEScalePytorchRCModel(nnet=torch_model,
                                           optimizer=optimizer,
                                           states=[wrapped_C7_eq, wrapped_alphaR],
                                           ee_params={'lr_0': 1e-3,  
                                                      'lr_min': 5e-5,  # lr_min = lr_0 / 20 is a good choice empirically
                                                      'epochs_per_train': 5,
                                                      'interval': 3,
                                                      'window': 75,
                                                      },
                                           descriptor_transform=wrapped_transform,
                                           #cache_file=arcd_store,
                                           )

In [19]:
# need a list with initialized movers
movers = [[arcdd.TwoWayShootingPathMover(modelstore=storage.central_memory[i].modelstore,
                                         states=[wrapped_C7_eq, wrapped_alphaR],
                                         engines=engs,
                                         engine_config=mdp,
                                         walltime_per_part=0.01,
                                         T=mdp["ref-t"][0],
                                         )
           ] for i, engs in enumerate(engines)
         ]

In [20]:
trainset = arcd.TrainSet(n_states=2)

In [21]:
tasks = [arcdd.logic.TrainingTask(model=model, trainset=trainset),
         arcdd.logic.SaveTask(storage=storage, model=model, trainset=trainset)]

In [22]:
brain = arcdd.Brain(model=model, workdir=scratch_dir, storage=storage, movers=movers, mover_weights=[[1.], [1.]], tasks=tasks)

In [23]:
initial_step = arcdd.logic.MCstep(mover=None, stepnum=0, directory=os.path.join(scratch_dir, "gmx_infiles"), path=initial_TP)

In [24]:
for c in brain.chains:
    c.current_step = initial_step

In [25]:
await brain.run_for_n_steps(500)

RuntimeError: This event loop is already running