Basic Usage, ZebraFish Haemoglobin protein
======================


## ``Gromacs_py`` basic example

Here is an example of a short simulation of the Zebrafish Haemoglobin protein.

Five successive steps are used:

1. Topology creation using ``GmxSys.add_top()``.
2. Solvation of the system using ``GmxSys.solvate_add_ions()``.
3. Minimisation of the structure using ``GmxSys.em_2_steps()``.
4. Equilibration of the system using ``GmxSys.em_equi_three_step_iter_error()``.
5. Production run using ``GmxSys.production()``.

### Import

In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

## To use `gromacs_py` in a project

In [None]:
from gromacs_py import gmx

## Simulation setup

- Define a few variables for you simulation, like:
  
    1. simulation output folders
    2. ionic concentration
    3. number of minimisation steps
    4. equilibration and production time


In [None]:
DATA_OUT = 'data_sim'

# System Setup
vsite='none'
ion_C = 0.15
sys_top_folder = os.path.join(DATA_OUT, 'sys_top')
ignore_hydrogen = {'ignh': None}

# Energy Minimisation
em_folder = os.path.join(DATA_OUT, 'em')
em_sys_folder = os.path.join(DATA_OUT, 'sys_em')
em_step_number = 1000

# Equillibration
equi_folder = os.path.join(DATA_OUT, 'sys_equi')
HA_time = 0.5
CA_time = 1.0
CA_LOW_time = 2.0

dt_HA = 0.001
dt = 0.002

HA_step = 1000 * HA_time / dt_HA
CA_step = 1000 * CA_time / dt
CA_LOW_step = 1000 * CA_LOW_time / dt

# Production
os.makedirs(DATA_OUT, exist_ok = True)
prod_folder = os.path.join(DATA_OUT, 'sys_prod')
prod_time = 10.0

prod_step = 1000 * prod_time / dt

## Create the `GmxSys` object

Load PDB file on disk

In [None]:
pdb_file = "ZEB_HB_Refined.pdb"
sys_name = "zeb_hb"
md_sys = gmx.GmxSys(name=sys_name, coor_file=pdb_file)

## Create topology:

**Note:** Hydrogen atoms need to be ignored, or else this won't work with this particular pdb

Topology creation involves:
- topology creation using `pdb2gmx` via the `add_top()` function
  * The easier `prepare_top()` function won't work here, 
  * as there seems to be a problem with the hydrogen information in the PDB, 
  * necessitating the passing of `-ign` option to `pdb2gmx` which `prepare_top()` can't do.
- box creation using `editconf`



In [None]:
md_sys.add_top(out_folder=DATA_OUT, name=sys_name, pdb2gmx_option_dict=ignore_hydrogen)
md_sys.create_box(dist=1.0, box_type="dodecahedron", check_file_out=True)

## Solvation (water and $Na^{+} Cl^{-}$)

In [None]:
#solvate and add ions
md_sys.solvate_add_ions(out_folder=DATA_OUT, name=sys_name, ion_C=0.15)

## Energy minimisation

Set parallelization and GPU options here. Change them later, if needed.

In [None]:
#Parallelization
nthreads = 6

#Set Parallelization
md_sys.nt = nthreads
#md_sys.ntmpi = 1
md_sys.gpu_id = '0'

md_sys.em_2_steps(out_folder=em_folder,
        no_constr_nsteps=em_step_number,
        constr_nsteps=em_step_number,
        posres="",
        create_box_flag=False)

## Plot energy:

In [None]:
ener_pd_1 = md_sys.sys_history[-1].get_ener(selection_list=['Potential'])
ener_pd_2 = md_sys.get_ener(selection_list=['Potential'])

ener_pd_1['label'] = 'no bond constr'
ener_pd_2['label'] = 'bond constr'

ener_pd = pd.concat([ener_pd_1, ener_pd_2])

ener_pd['Time (ps)'] = np.arange(len(ener_pd))

In [None]:
ax = sns.lineplot(x="Time (ps)", y="Potential",
        hue="label",
        data=ener_pd)
ax.set_xlabel('step')
ax.set_ylabel('energy (KJ/mol)')
plt.grid()

## System minimisation and equilibration

In [None]:
md_sys.em_equi_three_step_iter_error(out_folder=equi_folder,
    no_constr_nsteps=em_step_number,
    constr_nsteps=em_step_number,
    nsteps_HA=HA_step,
    nsteps_CA=CA_step,
    nsteps_CA_LOW=CA_LOW_step,
    dt=dt, dt_HA=dt_HA,
    vsite=vsite, maxwarn=1)


### Plot temperature

#### TODO: RTFM this

In [None]:
ener_pd_1 = md_sys.sys_history[-2].get_ener(selection_list=['Volume'])
ener_pd_2 = md_sys.sys_history[-1].get_ener(selection_list=['Volume'])
ener_pd_3 = md_sys.get_ener(selection_list=['Volume'])

ener_pd_1['label'] = 'HA_constr'
ener_pd_2['label'] = 'CA_constr'
ener_pd_2['Time (ps)'] = ener_pd_2['Time (ps)'] + ener_pd_1['Time (ps)'].max()
ener_pd_3['label'] = 'CA_LOW_constr'
ener_pd_3['Time (ps)'] = ener_pd_3['Time (ps)'] + ener_pd_2['Time (ps)'].max()

ener_pd = pd.concat([ener_pd_1, ener_pd_2, ener_pd_3])



In [None]:
ax = sns.lineplot(x="Time (ps)", y="Volume",
                  hue="label",
                  data=ener_pd)

ax.set_ylabel('Volume ($Å^3$)')
plt.grid()

### Plot RMSD

In [None]:
# Define reference structure for RMSD calculation
ref_sys =  md_sys.sys_history[1]

rmsd_pd_1 = md_sys.sys_history[-2].get_rmsd(['C-alpha', 'Protein'], ref_sys=ref_sys)
rmsd_pd_2 = md_sys.sys_history[-1].get_rmsd(['C-alpha', 'Protein'], ref_sys=ref_sys)
rmsd_pd_3 = md_sys.get_rmsd(['C-alpha', 'Protein'], ref_sys=ref_sys)


rmsd_pd_1['label'] = 'HA_constr'
rmsd_pd_2['label'] = 'CA_constr'
rmsd_pd_2['time'] = rmsd_pd_2['time'] + rmsd_pd_1['time'].max()
rmsd_pd_3['label'] = 'CA_LOW_constr'
rmsd_pd_3['time'] = rmsd_pd_3['time'] + rmsd_pd_2['time'].max()

rmsd_pd = pd.concat([rmsd_pd_1, rmsd_pd_2, rmsd_pd_3])


In [None]:
ax = sns.lineplot(x="time", y="Protein",
        hue="label",
        data=rmsd_pd)

ax.set_ylabel('RMSD (nm)')
ax.set_xlabel('Time (ps)')
plt.grid()

## Production

In [None]:
md_sys.production(out_folder=prod_folder,
        nsteps=prod_step,
        dt=dt, vsite=vsite, maxwarn=1)


## Prepare trajectory

In [None]:
# Center trajectory
md_sys.center_mol_box(traj=True)

## Basic Analysis

In [None]:
rmsd_prod_pd = md_sys.get_rmsd(['C-alpha', 'Protein'], ref_sys=ref_sys)
rmsd_prod_pd['label'] = 'Production'

rmsd_prod_pd['time'] = rmsd_prod_pd['time'] + rmsd_pd['time'].max()
rmsd_all_pd = pd.concat([rmsd_pd, rmsd_prod_pd])

In [None]:
ax = sns.lineplot(x="time", y="Protein",
        hue="label",
        data=rmsd_all_pd)
ax.set_ylabel('RMSD (nm)')
ax.set_xlabel('Time (ps)')
plt.grid()

## Trajectory vizualisation

In [None]:
# Align the protein coordinates
md_sys.convert_trj(select='Protein\nSystem\n', fit='rot+trans', pbc='none', skip='10')

In [None]:
view = md_sys.view_traj()
view.add_representation(repr_type='licorice', selection='protein')
view.center(selection='CA')
view