In [None]:
import torch
import numpy as np
import mdtraj

import boltzgen.zmatrix as zmatrix
import boltzgen.internal as ics
import boltzgen.mixed as mixed

# Load the alanine dipeptide trajectory
# This file is created using an OpenMM simulation
#   for example the code in the internal_coordinates 
#   notebook
aldp_traj = mdtraj.load('aldp100.h5')

In [None]:
# Display the atoms in the Alanine Dipeptide molecule
print(aldp_traj.topology.to_dataframe()[0])

In [None]:
# Mixed Coordinate Transform:
# - This transform converts some atoms to an internal
#   coordinate representation (bond length, bond angle,
#   dihedral angle) and the rest of the atoms to 
#   cartesian and then PCA is applied to the cartesian
#   ones.
# - The IC atoms need cartesian atoms to base their
#   bond length/angle etc on and so there is a 
#   minimum number of atoms that need to be cartesian.
# - However, you can choose more atoms to be cartesian
#   which makes inversion (IC -> cartesian) easier as
#   there can be more parallelisation in the inversion
#   process. With the minimum number of cartesian atoms,
#   the original molecule is built up one atom at a time,
#   as you can only place atoms that directly depend on
#   already cartesian atoms.
#   With a larger number of initial cartesian atoms,
#   different groups of atoms can be built up in parallel.
# - The decision as to which atoms to leave in cartesian
#   representation and which to put in internal 
#   coordinates seems to be a tradeoff.





# This matrix defines the atoms that we would like
# to represent in internal coordinates.
# The first index in the tuple is the index of the
# atom (say atom1) to represent in internal coordinates.
# The first index (atom2) in the list is the index of the
# atom to which atom1 is bonded
# atom3 and atom4 allow the bond angle and dihedral to be
# calculated
# atom2,3,4 must be either part of the cartesian set of
# atoms or already defined further up the z index matrix 
# e.g. once atom with index 1 has been defined on the first 
# line it can now be used to define atom with index 0
# on the second line
# This z index matrix for this molecule is not unique,
# this is just one possibility for it
z = [
    (1, [4, 5, 6]),
    (0, [1, 4, 5]),
    (2, [1, 0, 4]),
    (3, [1, 0, 2]),
    (7, [6, 4, 5]),
    (9, [8, 6, 7]),
    (10, [8, 6, 9]),
    (11, [10, 8, 9]),
    (12, [10, 8, 11]),
    (13, [10, 11, 12]),
    (17, [16, 14, 15]),
    (19, [18, 16, 17]),
    (20, [18, 19, 16]),
    (21, [18, 19, 20])
]

# These are the indeces of the atoms which we are 
# going to leave in cartesian coordinates. 
# Following on from Maccallum lab code, these are
# the atoms which openMM defines as being in the
# 'backbone' of the molecule
backbone_indices = [4, 5, 6, 8, 14, 15, 16, 18]



# This preprocessing code is taken directly from
# the maccallum lab examples
# center everything
aldp_traj.center_coordinates()

# superpose on the backbone
ind = aldp_traj.top.select("backbone")

aldp_traj.superpose(aldp_traj, 0, atom_indices=ind, ref_atom_indices=ind)




# Gather the training data into a pytorch Tensor with the right shape
training_data = aldp_traj.xyz
n_atoms = training_data.shape[1]
n_dim = n_atoms * 3
training_data_npy = training_data.reshape(-1, n_dim)
training_data = torch.from_numpy(training_data_npy.astype("float32"))


# This performs the mixed transform
# The z index matrix atoms are transformed to internal coordinates
# The cartesian atoms just have PCA applied to them
mixed_transform = mixed.MixedTransform(66, backbone_indices, z, training_data)

# Test the forward and backward pass
mixed_coords, jac = mixed_transform.forward(training_data)
orig_coords, invjac = mixed_transform.inverse(mixed_coords)

for i in range(20):
    print("org", training_data[0, 3*i:3*i+3])
    print("mxd", mixed_coords[0, 3*i:3*i+3])
    print("rcd", orig_coords[0, 3*i:3*i+3])
    print("--------")