In [1]:
import analyse
import os
import re
import importlib
import MDAnalysis as mda
import MDAnalysis.analysis.rms as rms
import MDAnalysis.analysis.hbonds
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
importlib.reload(analyse)

<module 'analyse' from '/Volumes/ADATA HV620/rp/code/analyse.py'>

In [3]:
def save_figure(fig, target):
    fig.savefig(target)

In [4]:
PDB_CODE = '6b73'
SEQ = 'B'
folder = '../'+PDB_CODE+SEQ+'/'
results_folder = folder+'results/'
plot_folder = results_folder+'plots/'

In [5]:
dcd_files = sorted([results_folder+'namd/'+i for i in os.listdir(results_folder+'namd/') if re.match(r'(step7.).*\_production.dcd$', i)])

In [6]:
print(dcd_files)

['../6b73B/results/namd/step7.10_production.dcd', '../6b73B/results/namd/step7.11_production.dcd', '../6b73B/results/namd/step7.12_production.dcd', '../6b73B/results/namd/step7.1_production.dcd', '../6b73B/results/namd/step7.2_production.dcd', '../6b73B/results/namd/step7.3_production.dcd', '../6b73B/results/namd/step7.4_production.dcd', '../6b73B/results/namd/step7.5_production.dcd', '../6b73B/results/namd/step7.6_production.dcd', '../6b73B/results/namd/step7.7_production.dcd', '../6b73B/results/namd/step7.8_production.dcd', '../6b73B/results/namd/step7.9_production.dcd']


In [7]:
trajectory =  mda.Universe(results_folder+'step5_assembly.xplor_ext.psf', dcd_files)
# ref = mda.Universe(folder+PDB_CODE+'_inp.pdb')
ref = mda.Universe(results_folder+'step5_assembly.namd.pdb')



In [None]:
title = 'RMSD of C-alpha atoms over production steps'
R, r_fig = analyse.rmsd_traj(trajectory, ref, title)

In [None]:
save_figure(r_fig[0].figure, plot_folder+'prod_rmsd')

In [None]:
pdb_file = folder+PDB_CODE+'_inp.pdb'
struct = analyse.secondary_structure(pdb_file, PDB_CODE)

In [None]:
rmsf_fig = analyse.rmsf_plot([trajectory], title='RMSF of residues over prodeuction step', structure=struct)

In [None]:
save_figure(rmsf_fig, plot_folder+'prod_rmsf')

In [None]:
protein = ref.select_atoms('protein')
print(protein.residues.resnames)

In [None]:
d = {'resId': protein.residues.resids, 'resName': protein.residues.resnames}
df = pd.DataFrame(data=d)
# print(df)

In [None]:
trp = ref.select_atoms('resid 233')
print(trp.residues)

In [None]:
r = rms.RMSD(trajectory.select_atoms('name CA'),
             ref.select_atoms('name CA'), 
             select='name CA', 
             groupselections=['resname TRP and name CA and resid 233', 'resname SER and name CA']).run()

In [None]:
R = r.rmsd.T
print(R)
frame = R[0]
time = R[1]
plt.subplots(figsize=(10,5))
plt.plot(time, R[2], linewidth=0.2)
plt.plot(time, R[3], linewidth=0.2)
plt.plot(time, R[4], linewidth=0.2)
plt.show()

In [None]:
print(ref.select_atoms('resid 233 and name CA')) 

## RMSF of the loops:

In [None]:
print(struct['helix'])

In [None]:
r = rms.RMSD(trajectory.select_atoms('name CA'),
             ref.select_atoms('name CA'), 
             select='name CA', 
             groupselections=['name CA and resid 3-32',
                              'name CA and resid 39-67',
                              'name CA and resid 73-105', 
                              'name CA and resid 116-142',
                              'name CA and resid 165-205',
                              'name CA and resid 213-245',
                              'name CA and resid 255-279']).run()

In [None]:
R = r.rmsd.T
frame = R[0]
time = R[1]
fig, ax = plt.subplots(1, figsize=(18,5))
ax.plot(time, R[2], linewidth=0.5, label='1 TM')
ax.plot(time, R[3], linewidth=0.5, label='2 TM')
ax.plot(time, R[4], linewidth=0.5, label='3 TM')
ax.plot(time, R[5], linewidth=0.5, label='4 TM')
ax.plot(time, R[6], linewidth=0.5, label='5 TM')
ax.plot(time, R[7], linewidth=0.5, label='6 TM')
ax.plot(time, R[8], linewidth=0.5, label='7 TM')
ax.legend()
plt.show()

In [None]:
save_figure(fig, plot_folder+'helix_rmsd')

### RMSD of important residues

In [None]:
r = rms.RMSD(trajectory.select_atoms('name CA'),
             ref.select_atoms('name CA'), 
             select='name CA', 
             groupselections=['name CA and resid 84',
                              'name CA and resid 156',
                              'name CA and resid 173', 
                              'name CA and resid 233']).run()

In [None]:
R = r.rmsd.T
frame = R[0]
time = R[1]
fig, ax = plt.subplots(1, figsize=(18,5))
ax.plot(time, R[2], linewidth=0.5, label='D138')
ax.plot(time, R[3], linewidth=0.5, label='C210')
ax.plot(time, R[4], linewidth=0.5, label='K227')
ax.plot(time, R[5], linewidth=0.5, label='W287')
ax.legend()
plt.show()

In [None]:
save_figure(fig, plot_folder+'residue_rmsd')

### RMSF of helixes:

In [None]:
h_prot = mda.analysis.hbonds.HydrogenBondAnalysis(trajectory, selection1='protein',
                                                  selection2='protein',
                                             selection1_type='both',
                                             distance=3.0)
h_prot.run()

In [None]:
h_prot.generate_table()
df = pd.DataFrame.from_records(h_prot.table)
df.hist(column=["distance"])
print(df)

In [None]:
plt.subplots(figsize=(20,20))
bob = plt.hist2d(h_prot.table['donor_resid'], h_prot.table['acceptor_resid'], bins=280)

In [None]:
inter_hbond = by_t_df.loc[(by_t_df['frequency'] > 1.5) & (by_t_df['donor_resid'] != by_t_df['acceptor_resid'])]
print(inter_hbond)