# Assessing RMSF and RMSD of MD simulation

Usage: function plotting will plot rmsd, rmsd histogram, rmsf and PCA projection

In [6]:
from __future__ import print_function
%matplotlib inline
import mdtraj as md
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy
from scipy.spatial.distance import squareform
import pandas as pd
import matplotlib.gridspec as gridspec
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')
#XRD Ensemble
#28 4NPQ
#18 4HFI

In [7]:
residue_selection = 'resSeq 8 to 316'

In [8]:
traj_notes = ['5NJY_pH70_md1','5NJY_pH70_md2','5NJY_pH70_md3','5NJY_F238L_pH70_md1',
                 '5NJY_F238L_pH70_md2','5NJY_F238L_pH70_md3','5NJY_I233T_pH70_md1',
                  '5NJY_I233T_pH70_md2','5NJY_I233T_pH70_md3','5NJY_F238LI233T_pH70_md1',
                  '5NJY_F238LI233T_pH70_md2','5NJY_F238LI233T_pH70_md3','5NJY_pH46_md1','5NJY_pH46_md2','5NJY_pH46_md3','5NJY_F238L_pH46_md1',
                 '5NJY_F238L_pH46_md2','5NJY_F238L_pH46_md3','5NJY_I233T_pH46_md1',
                  '5NJY_I233T_pH46_md2','5NJY_I233T_pH46_md3','5NJY_F238LI233T_pH46_md1',
                  '5NJY_F238LI233T_pH46_md2','5NJY_F238LI233T_pH46_md3','4HFI_pH46_md1','4HFI_pH46_md2','4HFI_pH46_md3','4HFI_F238L_pH46_md1',
                 '4HFI_F238L_pH46_md2','4HFI_F238L_pH46_md3','4HFI_I233T_pH46_md1',
                  '4HFI_I233T_pH46_md2','4HFI_I233T_pH46_md3','4HFI_F238LI233T_pH46_md1',
                  '4HFI_F238LI233T_pH46_md2','4HFI_F238LI233T_pH46_md3','4NPQ_pH70_md5','4NPQ_pH70_md6','4NPQ_pH70_md7','4NPQ_F238L_pH70_md3',
                 '4NPQ_F238L_pH70_md4','4NPQ_F238L_pH70_md5','4NPQ_I233T_pH70_md3',
                  '4NPQ_I233T_pH70_md4','4NPQ_I233T_pH70_md5','4NPQ_F238LI233T_pH70_md3',
                  '4NPQ_F238LI233T_pH70_md4','4NPQ_F238LI233T_pH70_md5']

In [9]:
def create_md_dataframe():    
    md_data = pd.DataFrame(columns=list(['MD_name','pH','replicate','resid']))
    return md_data

In [10]:
def create_metadata(md_data= None):   
    def append_metadata(traj_note,location = '/media/scottzhuang/data/MD/',skip=10,md_data= md_data):
        top_location = traj_note + '/' + traj_note + ".ca.gro"
        traj_location = traj_note + '/' + traj_note + ".skip" + str(skip) + ".ca.xtc"
        if skip == 1:
            traj = md.load(location + traj_location,top= location + top_location,stride=10)
        else:
            traj = md.load(location + traj_location,top= location + top_location)
        md_name = traj_note[:traj_note.find('pH')-1]
        pH = traj_note[traj_note.find('pH')+2:traj_note.find('pH')+4]
        md_replicate = traj_note[-1]
        for i in range(0,traj.n_residues):
            md_data.loc[md_data.shape[0]+1] = [md_name,pH,md_replicate,i + 8]
    for traj_note in traj_notes:
        append_metadata(traj_note)

In [11]:
def create_system_notation(md_data = None):
    system_notation = 0
    notation = 0
    location = '/media/scottzhuang/data/MD/'
    skip=10
    notations = []
    increment = 0
    for traj_note in traj_notes:
        top_location = traj_note + '/' + traj_note + ".ca.gro"
        traj_location = traj_note + '/' + traj_note + ".skip" + str(skip) + ".ca.xtc"
        traj = md.load(location + traj_location,top= location + top_location)
        for frame in range(0,traj.n_residues):
            notations.append(notation)
        if increment % 3 == 2:
            notation = notation + 1
        increment = increment + 1 
    md_data['system'] = notations

In [19]:
def create_rmsf_data(md_data = None):
    def append_rmsf_data(traj_note,location = '/media/scottzhuang/data/MD/', skip=10):
        top_location = traj_note + '/' + traj_note + ".ca.gro"
        traj_location = traj_note + '/' + traj_note + ".skip" + str(skip) + ".ca.xtc"
        traj = md.load(location + traj_location,top= location + top_location)
        traj.superpose(traj,0)
        avg_xyz = np.mean(traj.xyz[:, :, :], axis=0)
        rmsf = np.sqrt(3*np.mean((traj.xyz[:, :, :] - avg_xyz)**2, axis=(0,2)))       
        rmsf_data.extend(rmsf)
    rmsf_data = []
    for traj_note in traj_notes:
        append_rmsf_data(traj_note)
    md_data["rmsf"] = rmsf_data

In [14]:
md_data_resid = create_md_dataframe()
create_metadata(md_data = md_data_resid)

In [17]:
create_system_notation(md_data_resid)

ValueError: Length of values does not match length of index

In [20]:
create_rmsf_data(md_data_resid)

In [25]:
md_data_resid.to_csv("glic_gating_resid.csv")

In [23]:
md_data_resid.columns

Index(['MD_name', 'pH', 'replicate', 'resid', 'system', 'rmsf'], dtype='object')

In [24]:
md_data_resid

Unnamed: 0,MD_name,pH,replicate,resid,system,rmsf
1,5NJY,70,1,8,0,0.151650
2,5NJY,70,1,9,0,0.135975
3,5NJY,70,1,10,0,0.123123
4,5NJY,70,1,11,0,0.128611
5,5NJY,70,1,12,0,0.146535
6,5NJY,70,1,13,0,0.161250
7,5NJY,70,1,14,0,0.193895
8,5NJY,70,1,15,0,0.179716
9,5NJY,70,1,16,0,0.163906
10,5NJY,70,1,17,0,0.123767
