<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Kleines-Beispiel" data-toc-modified-id="Kleines-Beispiel-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Kleines Beispiel</a></span><ul class="toc-item"><li><span><a href="#Example---show-PT-structures-with-5-largest-and-5-smallest-potential-energies" data-toc-modified-id="Example---show-PT-structures-with-5-largest-and-5-smallest-potential-energies-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Example - show PT structures with 5 largest and 5 smallest potential energies</a></span></li><li><span><a href="#Example---plot-an-eigenvector-of-SqRA" data-toc-modified-id="Example---plot-an-eigenvector-of-SqRA-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Example - plot an eigenvector of SqRA</a></span></li></ul></li><li><span><a href="#Classify-full-trajectories-in-cells-of-FullGrid" data-toc-modified-id="Classify-full-trajectories-in-cells-of-FullGrid-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Classify full trajectories in cells of FullGrid</a></span></li></ul></div>

In [1]:
import sys
import os

try:
    os.chdir(r"/home/hanaz63/PAPER_MOLECULAR_ROTATIONS_2022/nobackup/molgri")
    sys.path.append(r"/home/hanaz63/PAPER_MOLECULAR_ROTATIONS_2022/nobackup")
except FileNotFoundError:
    os.chdir(r"D:\HANA\phD\PAPER_2022\molecularRotationalGrids")
    sys.path.append(r"D:\HANA\phD\PAPER_2022\molecularRotationalGrids")
    
import warnings
warnings.filterwarnings("ignore")

In [2]:
import nglview as nv
import MDAnalysis as mda
import numpy as np
import time
import ipywidgets as widgets
from scipy.sparse import csr_array
from numpy.typing import NDArray
import matplotlib

from molgri.molecules.transitions import SimulationHistogram, MSM, SQRA
from molgri.plotting.molecule_plots import TrajectoryPlot
import pandas as pd
import matplotlib.pyplot as plt
from molgri.molecules.parsers import FileParser, ParsedEnergy, XVGParser

from molgri.paths import PATH_OUTPUT_PT, OUTPUT_PLOTTING_DATA, PATH_OUTPUT_LOGGING
from molgri.space.fullgrid import FullGrid
from molgri.space.utils import k_argmin_in_array, k_argmax_in_array



Erstmals: wir brauchen eine HF Monomer Datai.

In [3]:
%pycat input/HF.xyz

Nun lass zwei PTs (klein, groß) produzieren:
- klein: -o 42 -t "linspace(0.1, 0.4, 10)" -b 8
- groß: -o 162 -t "linspace(0.1, 0.4, 20)" -b 40

## Kleines Beispiel
- obtain full grid
- show full grid coordinates per frame in the corner

In [4]:
%matplotlib notebook

In [5]:
class ViewManager:
    
    """
    NGLViewer is very useful but not very convinient for displaying particular frames of a trajectory together, 
    in particular color schemes etc. This class accepts a MDA Universe and knows how to extract frames from it.
    The plotting functions then accept indices (one or several) for this trajectory and display them in a 
    particular way (overlapping, sequential ...)
    """
    
    def __init__(self, u: mda.Universe):
        self.u = u
        self.fresh_view()
    
    def fresh_view(self):
        """
        Run this when you want to start a new view and discard an old one.
        """
        self.view = nv.NGLWidget()
    
    def get_ith_frame(self, i: int) -> mda.Universe:
        """
        The most important method acting on self.u. Get the Universe object containing of all the atoms 
        but only a single frame (i-th frame).
        
        Args:
            i (int): the index of the frame wanted
        
        Returns:
            a Universe containing the i-th frame of self.u
        """
        self.u.trajectory[i]
        all_atoms = self.u.select_atoms('all')
        new_u = mda.Merge(self.u.atoms)
        return new_u
    
    def _add_coordinate_axes(self):
        """
        Helper method. Add the x, y and z axes at origin to a NGLView.
        """
        
        # arguments of add_arrow are: start position, end position, color (in RGB), radius of arrow head
        # arguments of add_label are: position, color (in RGB), size, text
        
        # X-axis is red
        self.view.shape.add_arrow([0, 0, 0], [1, 0, 0],[ 1, 0, 0 ], 0.1)
        self.view.shape.add_label([1, 0, 0], [1, 0, 0], 1.5, 'x')
        
        # Y-axis is green
        self.view.shape.add_arrow([0, 0, 0], [0, 1, 0],[0, 1, 0 ], 0.1)
        self.view.shape.add_label([0, 1, 0], [0, 1, 0], 1.5, 'y')
        
        # Z-axis is blue
        self.view.shape.add_arrow([0, 0, 0], [0, 0, 1],[0, 0, 1 ], 0.1)
        self.view.shape.add_label([0, 0, 1], [0, 0, 1], 1.5, 'z')

    def plot_ith_frame(self, frame_i: int, axes: bool = True, **kwargs):
        """
        Plot i-th frame of self.u, adding to self.view.
        
        Args:
            - i: index of the frame
            - axes: if True, draw x, y and z axes
        """
        ith_atoms = self.get_ith_frame(frame_i)
        
        self.view.add_component(ith_atoms, default_representation=False)
        # the index is there in order to only affect the last added representation
        self.view[-1].add_representation("ball+stick", **kwargs)
        if axes:
            self._add_coordinate_axes()
        return self.view
    
    def _add_optional_representation_parameters(self, my_index: int, colors: list, opacities: list):
        """
        Helper method if you want to plot several view and pass arguments to them.
        """
        kwargs = {}
        if colors is not None:
            kwargs["color"] = colors[my_index]
        if opacities is not None:
            kwargs["opacity"] = opacities[my_index]
        return kwargs
        
    
    def plot_frames_sequential(self, list_indices: list, colors: list = None, opacities: list = None):
        """
        Plot several frames of the self.u next to each other. Automatically ngo to next now if you have too
        many frames to display in one row.
        
        Args:
            - list_indices: a list of integers, each an frame index to be displayed
            - colors: a list of colors (must be same length as list_indices) or None (default)
            - opacities: a list of opacities (must be same length as list_indices) or None (default)
        """
        
        # settings that are important so that rows with too many images nicely overflow in the next row
        box = widgets.Box(layout=widgets.Layout(width='100%',display='inline-flex',flex_flow='row wrap'))
        box.overflow_x = 'auto'

        all_views = []
        for li, list_i in enumerate(list_indices):
            self.fresh_view()
            # add optional parameters
            kwargs = self._add_optional_representation_parameters(li, colors, opacities)
            neig_view = self.plot_ith_frame(list_i, **kwargs)
            # this is also important for nice arragement of figures
            neig_view.layout.width = "200px"
            all_views.append(neig_view)
        
        
        # sync all views (so that all plots move if you move any)
        for v in all_views:
            v._set_sync_camera(all_views)
        
        box.children=[i for i in all_views]
        display(box)
    
    def plot_frames_overlapping(self, list_indices: list, colors: list = None, opacities: list = None):
        """
        Plot several frames of the self.u overlapping.
        
        Args:
            - list_indices: a list of integers, each an frame index to be displayed
            - colors: a list of colors (must be same length as list_indices) or None (default)
            - opacities: a list of opacities (must be same length as list_indices) or None (default)
        
        """
        
        for li, list_i in enumerate(list_indices):
            # add optional parameters
            kwargs = self._add_optional_representation_parameters(li, colors, opacities)
                
            self.plot_ith_frame(list_i, **kwargs)

        return self.view




In [6]:
"""
Some important indices you may wanna plot:
- eigenvectors of SQRA
"""

class PostCalculationEvaluator:
    
    """
    Combine the following (read or use saved if you have it available):
        - trajectory or pseudotrajectory (in form of mda Universe)
        - energies
        - full grid (FullGrid object)
        - transition model(MSM or SQRA object)
        
    
    """
    
    def __init__(self, name_pt, default_atom_selection):
        self.name_pt = name_pt
        self.default_atom_selection = default_atom_selection
        self.u = self.read_u_PT()
        self.parsed_trajectory = self.read_parsed_trajectory_PT()
        self.fg = self.read_fg_PT()
        self.energy = self.read_energy_PT()
        self.transition_model = self.read_sqra_PT()
        
        # just to check if everything is sane
        self._assert_consistent_len()
    
    def _assert_consistent_len(self):
        traj_len = len(self.u.trajectory)
        fg_len = len(self.fg.get_full_grid_as_array())
        energy_len = len(self.energy.energies)
        sqra_len = len(self.transition_model.get_transitions_matrix()[0])
        assert traj_len == fg_len == energy_len == sqra_len
        
    """
    --------------------------------------------------------------------------------------------------
                               Readers to set up all necessary structures.
    --------------------------------------------------------------------------------------------------
    """
    
    def __len__(self):
        traj_len = len(self.u.trajectory)
        return traj_len
    
    def read_u_PT(self):
        return mda.Universe(f"{PATH_OUTPUT_PT}{self.name_pt}.gro", f"{PATH_OUTPUT_PT}{self.name_pt}.xtc")

    def read_parsed_trajectory_PT(self):
        pt_parser = FileParser(f"{PATH_OUTPUT_PT}{self.name_pt}.gro", f"{PATH_OUTPUT_PT}{self.name_pt}.xtc")
        pt = pt_parser.get_parsed_trajectory(default_atom_selection=self.default_atom_selection)
        pt.energies = self.read_energy_PT()
        return pt

    def read_fg_PT(self):

        input_names = None
        full_grid_name = None

        # first step: read the name of the full grid from the log file
        with open(f"{PATH_OUTPUT_LOGGING}{self.name_pt}.log") as f:
            while input_names is None or full_grid_name is None:
                line = f.readline()
                if line.startswith("INFO:PtLogger:input grid parameters:"):
                    input_names = line.strip().split(": ")[-1]
                elif line.startswith("INFO:PtLogger:full grid name:"):
                    full_grid_name = line.strip().split(": ")[-1]
                    
        self.grid_name = full_grid_name
        
        input_names = input_names.split(" ")
        t_input = " ".join(input_names[2:])
        fg = FullGrid(o_grid_name=input_names[0], b_grid_name=input_names[1], t_grid_name=t_input,
                      use_saved=True)

        # second step: load the .npy file with the found name
        used_grid = np.load(f"{OUTPUT_PLOTTING_DATA}get_full_grid_as_array_{full_grid_name}.npy")

        # third step: assert that this is actually the grid that has been used
        assert np.allclose(used_grid, fg.get_full_grid_as_array())

        return fg

    def read_energy_PT(self):
        my_parser = XVGParser(f"/home/hanaz63/nobackup/gromacs/{self.name_pt}/{self.name_pt}.xvg")
        return my_parser.get_parsed_energy()

    def read_sqra_PT(self):
        sh = SimulationHistogram(self.parsed_trajectory, self.fg)
        return SQRA(sh, use_saved=True)
    
    """
    --------------------------------------------------------------------------------------------------
                               Getters to obtain important indices.
    --------------------------------------------------------------------------------------------------
    """    
    
    def get_indices_k_lowest_energies(self, k: int, energy_type: str):
        all_energies = self.energy.get_energies(energy_type)
        return k_argmin_in_array(all_energies, k)
    
    def get_indices_neighbours_of_cell_i(self, i: int):
        adj_array = csr_array(self.fg.get_full_adjacency())[:, [i]].toarray().T[0]
        neighbour_indices = np.nonzero(adj_array)[0]
        return neighbour_indices
    
    def get_indices_same_orientation(self, quaternion_grid_index: int):
        num_positions = self.fg.o_rotations.get_N() * self.fg.t_grid.get_N_trans()
        num_quaternions = self.fg.b_rotations.get_N()
        
        return list(range(quaternion_grid_index, len(self), num_quaternions))
    
    def get_indices_same_position(self, position_grid_index: int):
        num_quaternions = self.fg.b_rotations.get_N()

        return list(range(position_grid_index*num_quaternions, (position_grid_index+1)*num_quaternions))

    """
    --------------------------------------------------------------------------------------------------
                               Getters to obtain a measure of magnitude.
    --------------------------------------------------------------------------------------------------
    """
    
    def get_magnitude_energy(self, energy_type: str):
        return self.energy.get_energies(energy_type)
    
    def get_magnitude_ith_eigenvector(self, i: int):
        evalu, evec = self.transition_model.get_eigenval_eigenvec()
        my_eigenvector = evec[0].T[i]
        return my_eigenvector



class MplColorHelper:

    def __init__(self):
        self.cmap = matplotlib.cm.get_cmap('bwr')
        self.norm = matplotlib.colors.TwoSlopeNorm(vcenter=0) #, vmax=5
        self.scalarMap = matplotlib.cm.ScalarMappable(norm=self.norm, cmap=self.cmap)

    def get_hex(self, val):
        rgba = self.scalarMap.to_rgba(val)
        return matplotlib.colors.rgb2hex(rgba)
        

### Example - show PT structures with 5 largest and 5 smallest potential energies

In [10]:
# EXAMPLE - 

# changeable parameters
my_name1 = "H2O"
my_name2 = "H2O"
my_num = "0099"

my_name = f"{my_name1}_{my_name2}_{my_num}"
my_selection = "bynum 4:6"


# read everything from files 
pce = PostCalculationEvaluator(my_name, my_selection)


# display
vm = ViewManager(pce.u)
vm.fresh_view()

# magnitudes
mch = MplColorHelper()
magnitudes = pce.get_magnitude_energy("Potential")


colors = np.array([mch.get_hex(mag) for mag in magnitudes])


# 5 largest, 5 smallest
num_extremes = 5
argmin_index = k_argmin_in_array(magnitudes, num_extremes)
argmax_index = k_argmax_in_array(magnitudes, num_extremes)
both_index = [*argmax_index, *argmin_index]

print(magnitudes[both_index])

vm.plot_frames_overlapping(both_index, colors=colors[both_index], opacities=[0.5]*len(both_index))

[ 978.4646   1139.811523 1139.813599 1226.625488 1226.640991  -19.245655
  -19.247295  -16.837755  -16.837139  -15.785943]


NGLWidget()

### Example - plot an eigenvector of SqRA

In [11]:
# EXAMPLE - eigenvector

# display
vm = ViewManager(pce.u)
vm.fresh_view()

# magnitudes
mch = MplColorHelper()
magnitudes = pce.get_magnitude_ith_eigenvector(2)

colors = np.array([mch.get_hex(mag) for mag in magnitudes])


# 5 largest, 5 smallest
num_extremes = 20
argmin_index = k_argmin_in_array(magnitudes, num_extremes)
argmax_index = k_argmax_in_array(magnitudes, num_extremes)
both_index = [*argmax_index, *argmin_index]

print(magnitudes[both_index])

vm.plot_frames_overlapping(both_index, colors=colors[both_index], opacities=[0.5]*len(both_index))
vm.view

[ 0.04037964  0.04410047  0.04660584  0.04643954  0.04623079  0.04595805
  0.04513278  0.06340487  0.13230194  0.22122721  0.12195688  0.09654565
  0.10670643  0.08554157  0.26173064  0.17983264  0.50564893  0.58696169
  0.09340822  0.23770374 -0.16191115 -0.05852288 -0.05026359 -0.05038413
 -0.04650971 -0.05770409 -0.06182747 -0.04265221 -0.04951756 -0.04962655
 -0.04051547 -0.03982912 -0.03966136 -0.03870774 -0.03131237 -0.03124594
 -0.03849364 -0.03580822 -0.03108125 -0.03022683]


NGLWidget()

In [17]:
# not PT
my_path = "/home/hanaz63/nobackup/gromacs/H2O_H2O_0095_2000/"

u_traj = mda.Universe(f"{my_path}H2O_H2O_0095.gro", f"{my_path}fitted_output.xtc")

fg = FullGrid(o_grid_name="12", b_grid_name="8", t_grid_name="linspace(0.2, 1, 20)", use_saved=True)



my_path = "/home/hanaz63/nobackup/gromacs/H2O_H2O_0095_2000/"
topology = f"{my_path}H2O_H2O_0095.gro"
coordinates = f"{my_path}fitted_output.xtc"
energy = f"{my_path}full_energy.xvg"

# preparing the parsed trajectory
my_parser = XVGParser(energy)
pe = my_parser.get_parsed_energy()
pt_parser = FileParser(
    path_topology=topology,
    path_trajectory=coordinates)
parsed_trajectory = pt_parser.get_parsed_trajectory(default_atom_selection="bynum 4:6")
parsed_trajectory.energies = pe


sm = SimulationHistogram(parsed_trajectory, fg)
my_array1 = sm.get_all_assignments()

cell_74 = np.where(my_array1==74)[0]
print(cell_74)

#view = nv.show_mdanalysis(u_traj)
#view.add_unitcell()
#view
vm = ViewManager(u_traj)
vm.fresh_view()
vm.plot_frames_overlapping(cell_74)

[1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1779
 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793
 1794 1795 1796 1797 1798 1799 1800 1801]


NGLWidget()

In [19]:
all_energies = parsed_trajectory.energies.get_energies("Potential")
lowest_E = k_argmin_in_array(all_energies, 20)
print(lowest_E)
vm.fresh_view()
vm.plot_frames_overlapping(lowest_E)

[1059 1065 1064 1063 1298 1297 1296 1295 1294 1293 1292 1062 1061 1060
 1056 1057 1058 1291 1299 1066]


NGLWidget()

## Classify full trajectories in cells of FullGrid

In [105]:
from molgri.molecules.writers import PtIOManager

from MDAnalysis.coordinates.memory import MemoryReader
from MDAnalysis.analysis.base import AnalysisFromFunction

def _extract_universe_second_molecule(original_universe, selection_criteria):
    m2 = original_universe.select_atoms(selection_criteria)

    coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(), m2).run().results['timeseries']
    u2 = mda.Merge(m2)
    u2.load_new(coordinates, format=MemoryReader)
    return u2


def assign_trajectory_2_quaternion_grid(trajectory_universe: mda.Universe, m1_name, m2_name, b_grid_name,
                                       second_molecule_selection):
    
    # create PT on quaternion-only grid
    manager = PtIOManager(m1_name, m2_name, o_grid_name="1", b_grid_name=b_grid_name, 
                          t_grid_name="[0.1]")
    my_pt = manager.construct_pt()
    my_pt_name = manager.get_name()
    pt_sec_mol_universe = mda.Universe(f"{PATH_OUTPUT_PT}{my_pt_name}.gro", f"{PATH_OUTPUT_PT}{my_pt_name}.xtc")
    
    # in the real and pt trajectory, extract the second molecule and center it without rotating
    trajectory_universe_m2 = _extract_universe_second_molecule(trajectory_universe, second_molecule_selection)
    pt_universe_m2 = _extract_universe_second_molecule(pt_sec_mol_universe, second_molecule_selection)
    # move them to center - curently doing that later
    #workflow = [mda.transformations.center_in_box(real_traj_sec_mol.atoms, center="mass", point=(0, 0, 0))]
    #real_traj_sec_mol.trajectory.add_transformations(*workflow)
    
    # calculate RMSD between both
    total_results = []
    for i, ts in enumerate(pt_universe_m2.trajectory):
        results = []
        for j, ts2 in enumerate(trajectory_universe_m2.trajectory):
            results.append(mda.analysis.rms.rmsd(real_traj_sec_mol.trajectory[j].positions, 
                                                 pt_sec_mol.trajectory[i].positions,
                                                center=True, weights=real_traj_sec_mol.atoms.masses))
        total_results.append(results)
    total_results = np.array(total_results)
    clases = np.argmin(total_results, axis=0)
    return clases

    

my_cl = assign_trajectory_2_quaternion_grid(u_traj, "H2O", "H2O", "8", "bynum 4:6")
print(np.unique(my_cl, return_counts=True))

(array([0, 1, 2, 3, 4, 5, 6, 7]), array([362, 108, 341, 279,  94, 298, 168, 351]))


In [99]:
selected_class = 4

vm = ViewManager(pt_sec_mol)
vm.fresh_view()
vm.plot_ith_frame(selected_class)


NGLWidget()

In [109]:
for selected_class in range(8):
    vm = ViewManager(real_traj_sec_mol)
    vm.fresh_view()
    vm.plot_frames_overlapping(np.where(clases==selected_class)[0][::20])
    display(vm.view)

NGLWidget()

NGLWidget()

NGLWidget()

No such comm: d8094022ff5c4ebaa76b0652b1ce9cb2
No such comm: d8094022ff5c4ebaa76b0652b1ce9cb2
No such comm: d8094022ff5c4ebaa76b0652b1ce9cb2
No such comm: d8094022ff5c4ebaa76b0652b1ce9cb2
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: abd4465b4061480ebea46779f139d442
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: 3c1bcd991af043de976f3f7720075493
No such comm: d8094022ff5c4ebaa76b0652b1ce9cb2
No such comm: abd4465b4061480ebea46779f139d442
No such comm: 3c1bcd991af043de976f3f7720075493


In [None]:
# TODO: enable different colormaps
# TODO: enable expressing magnitude as opacity
# TODO: enable plotting only the most extrem values (most + and - for eigenvector)
# TODO: plot eigenvectors and see if they are sensible
# TODO: also plot 1D eigenvectors sorted by orientation/position
# TODO: network flow plot for strongest rates?

# TODO: make transition matrix determination faster and try it for a large matrix
# TODO: HF forcefield and calculations
# TODO: enable the same for plotting for simulations (real trajectories)

In [None]:
# test: trying this for a pt


In [3]:
my_fg = FullGrid("40", "42", "[1, 2, 3, 4]")
sh = SimulationHistogram(trajectory_path=PATH_OUTPUT_PT, trajectory_name="TRYP_NH3_0000", full_grid=my_fg, 
                         energies="None",
                 second_molecule_selection="resname SOL")
assignments = sh.assign_trajectory_2_full_grid()



In [4]:

np.set_printoptions(threshold=sys.maxsize)
print(assignments.astype(int))

[  19   19   19   37    4   19   20   19   19   33    4   19   19   23
   19   33    4   37   37   19   20   19   19    4   19   19   19   19
   19   19   33   19   19   33   19   19   37   37   38   19   68   57
   57   57   74   68   68   74   68   57   74   74   68   74   57   68
   57   68   47   74   57   57   57   57   57   68   68   57   74   74
   68   74   68   74   57   74   74   68   74   68  109  106  109  115
  118  109  117  109  109  117  109  109  109  109  115  118  109  115
  117  109  117  109   80  118  109  109  115  115   81   81  109  109
  115   81  106  109  115  115   81  115  147  129  130  147  126  129
  129  129  141  147  126  126  147  129  129  129  126  147  129  129
  129  135  129  135  122  141  147  122  129  147  135  126  147  129
  129  147  129  129  126  147  185  185  185  185  185  165  173  192
  185  165  181  185  185  173  174  165  185  192  192  185  175  181
  185  181  165  185  185  185  185  185  181  185  192  185  185  185
  192 