In [1]:
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import mdtraj
from time import time

import dna_analysis_tools as dna_tools
import misc_tools as misc_tools

# Creating ion density plots around biomolecules

## 1. Centering and aligning the biomolecules
Simulations are centered around the biomolecule of interest using `VMD`. All the simulations for a given test system are loaded into first PDB file (i.e. `out1.pdb`). 

If the biomolecule is a protein, the following command is entered in the `tk console` to center the solvent around biomolecule:
```
package require pbctools
pbc wrap -centersel "protein" -center com -compound residue -all
```
For the DNA dodecamer, the following commands are used:
```
package require pbctools
pbc wrap -centersel "resid 1 to 24" -center com -compound residue -all
```
Using the `RMSD trajectory tool` in `VMD`, the simulations are then aligned to the first PDB file of the first simulation (`out1.pdb`). All the atoms in the macromolecule are used in the alignment. 


## 2. Making the 3D density grid around the macromolecule

## Useful functions

In [2]:
def get_ion_identities(netcdf_filenames, stride=1):
    """
    Extract the ion densities from a series of netcdf files from saltswap.
    """
    identities = []
    for filename in netcdf_filenames:
        print(filename)
        ncfile = Dataset(filename, 'r')
        data = ncfile.groups['Sample state data']['identities'][:,:]
        identities.append(data[::stride, :])
        ncfile.close()
    identities = np.vstack([*identities])
    return np.array(identities)

def get_edges(xyz, spacing=1.5, padding=0) :
    """
    Initialize the egdes required for a 3D histogram of coordinates. 
    
    Adapted from the script 'calc_density' from ProtoMS (protoms.org)

    Parameters
    ----------
    xyz: numpy.ndarray
        Cartesian coordinates that should be covered by the grid
    spacing: float
        the grid spacing
    padding: float
        the space to add to minimum extent of the coordinates

    Returns
    -------
    edges: list of numpy arrays
        the edges of the grid
    """

    origin = np.floor(xyz.min(axis=0)) - padding
    tr = np.ceil(xyz.max(axis=0)) + padding
    length = tr-origin
    shape = np.array([int(l/spacing + 0.5) + 1 for l in length],dtype=int)
    edges = [np.linspace(origin[i],tr[i],shape[i]) for i in range(3)]
    return edges

### Load the simulation data

In [3]:
# Get the ion indices
file_names = ['out1.nc', 'out2.nc', 'out3.nc']
files = ['../testsystems/dhfr/200mM/' + f for f in file_names]
identities = get_ion_identities(files, stride=1)

# Load the trajectory with the protein centered and aligned.
traj = mdtraj.load('../testsystems/dhfr/200mM/out_all_aligned.dcd', top='../testsystems/dhfr/200mM/out1.pdb')

../testsystems/dhfr/200mM/out1.nc
../testsystems/dhfr/200mM/out2.nc
../testsystems/dhfr/200mM/out3.nc


### Get the cation coordinates

In [4]:
water_indices = traj.topology.select_atom_indices('water')

[nframes, nwaters] = identities.shape
cation_xyz = []
anion_xyz = []
for frame in range(traj.n_frames - 1):
    # Cations
    indices = [water_indices[water_index] for water_index in range(nwaters) if identities[frame, water_index]==1]
    cation_xyz.append(traj.xyz[frame + 1, indices, :])
    # ("frame + 1" is used instead of just "frame") because the loaded trajectory also contains the initial PDB (out1.pdb)).
    # Anions
    indices = [water_indices[water_index] for water_index in range(nwaters) if identities[frame, water_index]==2]
    anion_xyz.append(traj.xyz[frame + 1, indices, :])

# Simplifying data structure and converting from nanometers to Ångstroms.
cation_xyz = np.vstack([*cation_xyz]) * 10.0
anion_xyz = np.vstack([*anion_xyz]) * 10.0

### 3D histogram of cations and anions

In [18]:
# Cations
edges = get_edges(cation_xyz, spacing=1.5)
h, edges = np.histogramdd(cation_xyz, edges)
# Scale the histogram and set to percent
h = h / np.max(h) * 100.0
misc_tools.writeDX(h, [e[0] for e in edges], np.diff(edges[1])[0], 'cation_density.dx')

# Anions
edges = get_edges(anion_xyz, spacing=1.5)
h, edges = np.histogramdd(anion_xyz, edges)
# Scale the histogram and set to percent
h = h / np.max(h) * 100.0
misc_tools.writeDX(h, [e[0] for e in edges], np.diff(edges[1])[0], 'anion_density.dx')

### Using ProtoMS's density tool

In [5]:
padding = 0
extent = 2
spacing = 1

In [6]:
# Cations
grid, edges = misc_tools._init_grid(cation_xyz, spacing, padding)
for coord in cation_xyz:
    misc_tools._fill_sphere(coord, grid, edges, spacing, extent)
grid = grid / np.max(grid) * 100.0   
misc_tools.writeDX(grid, [e[0] for e in edges], spacing, 'dhfr_cation_density_pms.dx')

In [7]:
normed = grid.copy()
normed = normed / np.sum(normed)
normed[np.where(grid > 15)].sum() * 100

6.3833114116321701

In [8]:

# Anions
grid, edges = misc_tools._init_grid(anion_xyz, spacing, padding)
for coord in anion_xyz:
    misc_tools._fill_sphere(coord, grid, edges, spacing, extent)
grid = grid / np.max(grid) * 100.0
misc_tools.writeDX(grid, [e[0] for e in edges], spacing, 'dhfr_anion_density_pms.dx')


In [9]:
normed = grid.copy()
normed = normed / np.sum(normed)
normed[np.where(grid > 60)].sum() * 100

0.85779107115544795