In [1]:
#import the necessary modules 
%matplotlib inline 
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
#import scipy
import sklearn
import itertools as it
from itertools import cycle 
import os.path as op
import timeit 
import json
from matplotlib import animation
import matplotlib.font_manager as font_manager
from collections import namedtuple
#from functools import partial
#from pathlib import Path

In [2]:
# Set plotting style
plt.style.use('seaborn-white')

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
#import matplotlib.pyplot as plt

In [3]:
%matplotlib widget

In [4]:
import multiprocessing as m_proc
m_proc.cpu_count()

4

### Now use MD Analysis to calculate no. of frames a center PLGA residues and terminal PLGA residue is with 4 Angstroms of BSA (1.2 nm restrained system)

Import MDAnalysis

In [5]:
from prot_polymer_analysis import get_protresd_list, aa_frmcount, grptwocnt_aa, gtwo_trjcnt 
from prot_polymer_analysis import frac_cont, bavg_frac_cnt, prot_poly_cntmovie, AA_list_org

In [6]:
# Import MDAnalysis
import MDAnalysis as mda
import MDAnalysis.analysis.distances as maa_dist

### First table will be total fractional contacts and oligomer occupancy values for each Rg value 

#### Distance-based analysis 

Find residues that have at least one atom within a cutoff $d = 4.0$ Angstrom near water molecules in BSA/water simulation

Calculate the number of surface bsa residues from a 1 ns BSA/water simulation

In [7]:
#Units of Angstroms 
dmax = 4.0 

In [8]:
def middle_of_band(band_start, band_stop, plot_min=0, plot_max=60):
    half_way = (band_stop - band_start) / 2
    mid_band = band_start + half_way
    plot_fraction = (mid_band - plot_min) / (plot_max - plot_min)

    return plot_fraction

# 1.2 nm PLGA unrestrained Rg 100 ns trajectory

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [9]:
# load the unrestrained trajectory 
pn12nm_nores = mda.Universe("../1.2nm_bsa_prod/res_off/new_conf1.2nm.pdb"
                      , "../1.2nm_bsa_prod/res_off/pp_12nmresoff.xtc")

Check that we are on the first frame

In [10]:
pn12nm_nores.trajectory.frame

0

In [11]:
pn20nr_len = len(pn12nm_nores.trajectory)
pn20nr_len

10001

In [12]:
#Select all the PLGA residues, heavy atoms only 
pn20_allnr = pn12nm_nores.select_atoms("resname sPLG PLG tPLG and not type H")
pn20_allnr

<AtomGroup with 543 atoms>

In [13]:
# Select BSA residues, heavy atoms only 
prot_nores = pn12nm_nores.select_atoms("protein and not type H")
prot_nores

<AtomGroup with 4653 atoms>

### Contact Analysis

In [14]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = pn20nr_len - 1
snr_time = timeit.default_timer()
h2di_nr = aa_frmcount(prot_nores, pn20_allnr, dmax, pn12nm_nores, start, end)
timeit.default_timer() - snr_time

479.56650141626596

In [15]:
len(h2di_nr.keys())

59

In [18]:
a_a = ["GLY","ALA","VAL","LEU","ILE","MET","PHE","TRP","PRO","SER","THR","CYS","TYR","ASN","GLN","ASP"
               ,"GLU","LYS","ARG","HIS"]

In [20]:
# This code chunk gets the BSA residues and their corresponding number in a pandas dataframe 
red_bsa = []
bh = np.arange(0,584)
for i in range(583):
    b_str = str(list(prot_nores.residues[i:i+1]))
    if str(bh[i+1]) in b_str: 
        red_bsa.append(str(b_str[10:13])+" "+str(bh[i+1]))

In [21]:
# Extract no. of contacted frames
pr_resnr = list(prot_nores.residues)
ss_resnr = [str(row) for row in pr_resnr]
rkg_12nr = {key:h2di_nr[key][0] for key, value in h2di_nr.items()}
plg_1_2nr = pd.DataFrame(data=ss_resnr, columns=["BSA_des_res"])
plg_1_2nr['mda_1.2nm_nores'] = plg_1_2nr['BSA_des_res'].map(rkg_12nr)
plg_1_2nr['BSA_des_res'] = red_bsa
plg_1_2nr['mda_1.2nm_nores'] = plg_1_2nr['mda_1.2nm_nores'].replace('nan', np.nan).fillna(0)
plg_1_2nr.head()

Unnamed: 0,BSA_des_res,mda_1.2nm_nores
0,ASP 1,0.0
1,THR 2,0.0
2,HIS 3,0.0
3,LYS 4,0.0
4,SER 5,0.0


In [24]:
# Extract mean occupancy values
pr_res_12ur = list(prot_nores.residues)
ss_res_12ur = [str(row) for row in pr_res_12ur]
rkg_12ur = {key:h2di_nr[key][1] for key, value in h2di_nr.items()}
plg_1_2nm_NRocc = pd.DataFrame(data=ss_res_12ur, columns=["BSA_des_res"])
plg_1_2nm_NRocc['mda_occ_1.2nm_NR'] = plg_1_2nm_NRocc['BSA_des_res'].map(rkg_12ur)
plg_1_2nm_NRocc['mda_occ_1.2nm_NR'] = plg_1_2nm_NRocc['mda_occ_1.2nm_NR'].replace('nan', np.nan).fillna(0)
plg_1_2nm_NRocc['mda_occ_1.2nm_NR'] = plg_1_2nm_NRocc['mda_occ_1.2nm_NR'].round(2)
plg_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NR
0,"<Residue ASP, 1>",0.00
1,"<Residue THR, 2>",0.00
2,"<Residue HIS, 3>",0.00
3,"<Residue LYS, 4>",0.00
4,"<Residue SER, 5>",0.00
...,...,...
578,"<Residue GLN, 579>",0.00
579,"<Residue THR, 580>",0.00
580,"<Residue ALA, 581>",0.01
581,"<Residue LEU, 582>",0.00


In [25]:
bsa_r = np.array(list(prot_nores.resids)) # shape is 4652
m_occ_12ur = np.array(list(plg_1_2nm_NRocc['mda_occ_1.2nm_NR'])) # shape is 583
m_occ = np.zeros(shape=(4653))
at_ind = np.where(bsa_r[:-1] != bsa_r[1:])[0]
at_in_nw = np.sort(np.append([0,4653],at_ind))
nw_v = 0
for i in range(583):
    b = at_in_nw[i+1] +1
    m_occ[nw_v:b] = m_occ_12ur[i]
    nw_v = at_in_nw[i+1] + 1 

In [26]:
m_occ[3089:3099]

array([0.  , 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.  ])

In [None]:
list(prot_nores.atoms[3089:3099])

In [None]:
np.nonzero(m_occ)

### Visualize Occupanct on protein 

In [None]:
prot_nores.occupancies = m_occ
prot_nores.occupancies

In [None]:
with mda.Writer("prot_12nm_nores.pdb") as pdb:
    pdb.write(prot_nores)

### Residue Importance: 1.2 nm Unrestrained 

In [None]:
trj_pp12nm_nr = prot_poly_cntmovie(prot_nores, pn20_allnr, dmax, pn12nm_nores, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [29]:
np.save('1.2nm_NoRes.npy', trj_pp12nm_nr)    # .npy extension is added if not given

In [30]:
trj_pp12nm_nr = np.load("1.2nm_NoRes.npy", allow_pickle=True)

In [31]:
np.sum(trj_pp12nm_nr[1000][0])

0.0

In [32]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [33]:
pp_12nmur_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        pp_12nmur_ct[i][j] = np.sum(trj_pp12nm_nr[i][j])

In [34]:
pp_12nmtot_nr = np.zeros(shape=(583))
for i in range(583):
    pp_12nmtot_nr[i] = np.sum(pp_12nmur_ct[:,i])
#pp_12nmtot

In [35]:
np.nonzero(pp_12nmtot_nr)

(array([384, 385, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397,
        398, 399, 400, 403, 404, 407, 408, 411, 412, 432, 433, 436, 437,
        438, 439, 440, 443, 470, 490, 491, 492, 493, 516, 520, 523, 535,
        536, 537, 538, 539, 540, 541, 543, 544, 547, 548, 550, 551, 554,
        555, 558, 576, 579, 580, 581, 582]),)

In [None]:
plt.close('all')

In [36]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 3
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(y_pos+0.25, pp_12nmtot_nr, align='center',width=wid, color='blue', alpha=0.3, label='1.2 nm PLGA No restraint')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim([0,600])
plt.ylim([0,50000])
plt.legend(fontsize=14)
plt.ylabel(r'Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block

In [38]:
# Read in data from the oputput of wrapper.sh, where the frame count is given for each BSA residue that was within 
# 4 angstroms of PLGA trimer 
wat_data = pd.read_csv('occ_BSA1ns.txt', sep=" ", header=None, usecols=None ,index_col=None)
wat_data.columns = ["BSA_res_no","No. of frames (VMD)"]
wat_data = wat_data.drop("BSA_res_no", axis=1)

pr_res = list(prot_nores.residues)
ss_res = [str(row) for row in pr_res]

wat_data['BSA_des_res'] = ss_res
wat_data = wat_data[['BSA_des_res',"No. of frames (VMD)"]]
#wat_data.head()

# load MDAnalysis values from MDA_BSA1ns.txt file(129003 atoms SOL group was used to calc. frame counts for txt.
# file)
h2ob_dict = json.load(open("MDA_BSA1ns.txt"))
wat_data['Mda_frames'] = wat_data['BSA_des_res'].map(h2ob_dict)


# From MD Analysis
#Get the count of bsa residues that have 1001 or 1002 frames ( I ran a 1 ns NPT simulation of 1 BSA in water )

#aa_count = pd.DataFrame(data=a_a)
c_list = []

for i in range(len(a_a)):
    count = 0
    for index, row in wat_data.iterrows():
        if a_a[i] in row["BSA_des_res"]:
            if row['Mda_frames'] == 1001: 
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames']))) 
            elif row['Mda_frames'] == 1000:
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames'])))
    c_list.append(str(str(a_a[i])+"  "+str(count)))
    
#c_list

# From VMD
#Get the count of bsa residues that have 1001 or 1002 frames ( I ran a 1 ns NPT simulation of 1 BSA in water )

#aa_count = pd.DataFrame(data=a_a)
vmd_list = []

for i in range(len(a_a)):
    count = 0
    for index, row in wat_data.iterrows():
        if a_a[i] in row["BSA_des_res"]:
            if row["No. of frames (VMD)"] == 1001: 
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames']))) 
            elif row["No. of frames (VMD)"] == 1002:
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames'])))
    vmd_list.append(str(str(a_a[i])+"  "+str(count)))

# Main difference is that Alanine 583 is counted for all 1001 frames. It seems VMD is unable to calc dist for that res
#vmd_list

#hydrophobic_res = ['ALA', 'ILE', 'LEU', 'VAL', 'GLY', 'PRO','PHE', 'TRP','MET']
#polar_res = ['ASN', 'CYS', 'GLN', 'SER', 'THR','TYR']
#neg_res = ['ASP', 'GLU']
#pos_res = ['ARG', 'HIS', 'LYS']
# aromatic_res = ['PHE', 'TRP', 'TYR', 'HIS']
#all_res = [pos_res, neg_res, polar_res, hydrophobic_res]

# Put the AA count in a pandas dataframe 
dg , ji = AA_list_org(c_list)
aa_count = pd.DataFrame(data=dg, index=None, columns=['Amino_acids'])
new_lf = pd.Series(data=ji, index=None)
vmg, vmdj = AA_list_org(vmd_list)
n2lf = pd.Series(data=vmdj, index=None)
aa_count['No_of_surf_res (MDAnalysis)'] = new_lf
aa_count['No_of_surf_res (VMD)'] = n2lf

In [39]:
apl_12nm_nr = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plg_1_2nr.iterrows():
    if r_pl['mda_1.2nm_nores'] != 0:
        apl_12nm_nr.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cpl_12nm_nr = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(apl_12nm_nr)):
        if r_a['Amino_acids'] in apl_12nm_nr[i]:
            count += 1
    cpl_12nm_nr.append(count)      
        
aa_count['plga_1.2nm_100ns_NR'] = cpl_12nm_nr
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plga_1.2nm_100ns_NR
0,LYS,59,59,9
1,ARG,23,23,3
2,HIS,16,16,0
3,ASP,40,40,4
4,GLU,59,59,7
5,SER,23,23,1
6,THR,29,29,6
7,CYS,24,24,2
8,TYR,19,19,1
9,ASN,14,14,4


In [40]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plga_1.2nm_100ns_NR'].sum()

59

In [41]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [42]:
# This gives the total fraction of contacts within the 1.2 nm Rg 100 ns trajectory
aa_count['plga_1.2nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.12114989733059549

In [43]:
# Mean occupancy and std deviation 
ll_mo12_nr = [value[1] for key, value in h2di_nr.items()]
print("Mean Occpancy (1.2 nm unrestrained Rg): "+str(np.mean(ll_mo12_nr)), "Occ. std. dev.: "+str(np.std(ll_mo12_nr)))

Mean Occpancy (1.2 nm unrestrained Rg): 0.2085474576271186 Occ. std. dev.: 0.3199042755956597


In [44]:
cd_12nm_nr = frac_cont(h2di_nr)
cd_12nm_nr

{'Negative': [44819.0, 11, 240392.81818181818, 0.339871169960441],
 'Positive': [27846.0, 12, 136909.5, 0.19356481735034775],
 'Polar': [25454.0, 18, 83432.55555555555, 0.11795826715592309],
 'Hydrophobic': [24924.0, 18, 81695.33333333333, 0.11550215489094945],
 'Aromatic': [5589.0, 2, 164875.5, 0.23310359064233865]}

In [45]:
no_surf = aa_count['No_of_surf_res (MDAnalysis)'].sum()
no_surf

487

### Calc. fractional contacts for each AA group type 

In [46]:
fcnt1_2nm_nr, prgrp12nm_nr, aamatx_12nm_nr = bavg_frac_cnt(5, prot_nores, pn20_allnr, dmax,
                                                        pn12nm_nores, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [47]:
fcnt1_2nm_nr

{'Negative': array([0.36003918, 0.43935885, 0.45733722, 0.2513063 , 0.21542932]),
 'Positive': array([0.17740878, 0.24274806, 0.24178814, 0.22820893, 0.19316669]),
 'Polar': array([0.18053639, 0.12515765, 0.14951409, 0.12973747, 0.09892214]),
 'Hydrophobic': array([0.12768307, 0.19273544, 0.14067055, 0.19902762, 0.23588641]),
 'Aromatic': array([0.15433258, 0.        , 0.01068999, 0.19171969, 0.25659544]),
 'total_frac': array([0.09240246, 0.0349076 , 0.03696099, 0.06570842, 0.0698152 ])}

In [48]:
fc_12nmnr_mean = np.array([np.mean(fcnt1_2nm_nr['Negative']), np.mean(fcnt1_2nm_nr['Positive'])
                        ,np.mean(fcnt1_2nm_nr['Polar']),np.mean(fcnt1_2nm_nr['Hydrophobic'])
                        , np.mean(fcnt1_2nm_nr['Aromatic'])])
fc_12nmnr_mean

array([0.34469417, 0.21666412, 0.13677355, 0.17920062, 0.12266754])

In [49]:
fc_12nmnr_std = np.array([np.std(fcnt1_2nm_nr['Negative']), np.std(fcnt1_2nm_nr['Positive'])
                       ,np.std(fcnt1_2nm_nr['Polar']),np.std(fcnt1_2nm_nr['Hydrophobic'])
                       , np.std(fcnt1_2nm_nr['Aromatic'])])
fc_12nmnr_std

array([0.09727901, 0.02660068, 0.02718335, 0.03982011, 0.10128559])

In [None]:
x_pos = np.arange(5)
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, fc_12nmnr_mean, yerr=fc_12nmnr_std, ecolor='black',capsize=5, color='c')
plt.title(r'Fractional Contacts 1.2 nm Rg unrestrained', fontsize=15)
plt.xticks(x_pos, labels=aa_types, fontsize=12)
plt.ylabel(r'Fractional Contacts', fontsize=15)

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [50]:
np.mean(fcnt1_2nm_nr['total_frac'])

0.05995893223819302

In [51]:
np.std(fcnt1_2nm_nr['total_frac'])

0.021629887609468077

### Avg no. PLGA residues per BSA AA residue group 

In [None]:
prgrp12nm_nr

In [None]:
mean_12nm_nr = np.zeros(shape=5)
std_12nm_nr = np.zeros(shape=5)
count = 0
for key, value in prgrp12nm_nr.items():
    mpl_12nm_nr = []
    var_12nm_nr = []
    for i in prgrp12nm_nr[str(key)].flat:
        mpl_12nm_nr.append(i[0])
        var_12nm_nr.append((i[1])**2)
    
    # calc frac cont averages
    mean_12nm_nr[count] = np.mean(mpl_12nm_nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_12nm_nr[count] = np.std(mpl_12nm_nr)
    
    count += 1


In [None]:
mean_12nm_nr

In [None]:
std_12nm_nr

In [None]:
#std_12nm_nr[4] = 1
std_12nm_nr

In [None]:
x_pos = np.arange(5)
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, mean_12nm_nr, yerr=std_12nm_nr, ecolor='black',capsize=5)
plt.title(r'No. of PLGA residues 1.2 nm Rg unrestrained', fontsize=15)
plt.xticks(x_pos, labels=aa_types, fontsize=12)
plt.ylabel(r'No. of PLGA residues', fontsize=15)

### Protein/polymer contact map movie

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 1.2 nm Unres", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_pp12nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('1.2nm_NoRes.mp4',writer='ffmpeg',fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

# 1.5 nm PLGA unrestrained Rg 100 ns trajectory

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [52]:
# load the unrestrained trajectory 
pn15nm_nores = mda.Universe("../1.5nm_bsa_prod/1.5res_off/new_conf1.5nm.pdb"
                      , "../1.5nm_bsa_prod/1.5res_off/pp1_5nmresoff.xtc")

Check that we are on the first frame

In [53]:
pn15nm_nores.trajectory.frame

0

In [54]:
pn20_15nr = len(pn15nm_nores.trajectory)
pn20_15nr

10001

In [55]:
#Select all the PLGA residues, heavy atoms only 
pn20_all15nr = pn15nm_nores.select_atoms("resname sPLG PLG tPLG and not type H")
pn20_all15nr

<AtomGroup with 543 atoms>

In [56]:
# Select BSA residues, heavy atoms only 
prot15_nores = pn15nm_nores.select_atoms("protein and not type H")
prot15_nores

<AtomGroup with 4653 atoms>

### Contact Analysis

In [57]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = pn20_15nr - 1
s_time15nr = timeit.default_timer()
h2di_15nr = aa_frmcount(prot15_nores, pn20_all15nr, dmax, pn15nm_nores, start, end)
timeit.default_timer() - s_time15nr

943.5420586084947

In [58]:
len(h2di_15nr.keys())

153

In [60]:
pr_15nr = list(prot15_nores.residues)
ss_15nr = [str(row) for row in pr_15nr]
rkg_15nm_nr = {key:h2di_15nr[key][0] for key, value in h2di_15nr.items()}
plg_1_5nm_nr = pd.DataFrame(data=ss_15nr, columns=["BSA_des_res"])
plg_1_5nm_nr['mda_1.5nm_nr'] = plg_1_5nm_nr['BSA_des_res'].map(rkg_15nm_nr)
plg_1_5nm_nr['BSA_des_res'] = red_bsa
plg_1_5nm_nr['mda_1.5nm_nr'] = plg_1_5nm_nr['mda_1.5nm_nr'].replace('nan', np.nan).fillna(0)
plg_1_5nm_nr.head()

Unnamed: 0,BSA_des_res,mda_1.5nm_nr
0,ASP 1,88.0
1,THR 2,3838.0
2,HIS 3,6636.0
3,LYS 4,7044.0
4,SER 5,2651.0


In [61]:
# Extract mean occupancy values
pr_res_15ur = list(pn15nm_nores.residues)
ss_res_15ur = [str(row) for row in pr_res_15ur]
rkg_15ur = {key:h2di_15nr[key][1] for key, value in h2di_15nr.items()}
plg_1_2nm_NRocc['mda_occ_1.5nm_NR'] = plg_1_2nm_NRocc['BSA_des_res'].map(rkg_15ur)
plg_1_2nm_NRocc['mda_occ_1.5nm_NR'] = plg_1_2nm_NRocc['mda_occ_1.5nm_NR'].replace('nan', np.nan).fillna(0)
plg_1_2nm_NRocc['mda_occ_1.5nm_NR'] = plg_1_2nm_NRocc['mda_occ_1.5nm_NR'].round(2)
plg_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NR,mda_occ_1.5nm_NR
0,"<Residue ASP, 1>",0.00,0.01
1,"<Residue THR, 2>",0.00,0.38
2,"<Residue HIS, 3>",0.00,0.66
3,"<Residue LYS, 4>",0.00,0.70
4,"<Residue SER, 5>",0.00,0.27
...,...,...,...
578,"<Residue GLN, 579>",0.00,0.00
579,"<Residue THR, 580>",0.00,0.00
580,"<Residue ALA, 581>",0.01,0.00
581,"<Residue LEU, 582>",0.00,0.00


In [63]:
bsa_r = np.array(list(prot15_nores.resids)) # shape is 4652
m_occ_15nmNR = np.array(list(plg_1_2nm_NRocc['mda_occ_1.5nm_NR'])) # shape is 583
m_occ15 = np.zeros(shape=(4653))
at_ind = np.where(bsa_r[:-1] != bsa_r[1:])[0]
at_in_nw = np.sort(np.append([0,4653],at_ind))
nw_v = 0
for i in range(583):
    b = at_in_nw[i+1] +1
    m_occ15[nw_v:b] = m_occ_15nmNR[i]
    nw_v = at_in_nw[i+1] + 1 

In [64]:
m_occ15[0:8]

array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01])

In [None]:
np.nonzero(m_occ)[0]

### Visualize Occupanct on protein 

In [None]:
prot15_nores.occupancies = m_occ
prot15_nores.occupancies

In [None]:
with mda.Writer("prot_15nmNoRes.pdb") as pdb:
    pdb.write(prot15_nores)

### Residue Importance: 1.5 nm Unrestrained 

In [None]:
trj_ppmap15nm_nr = prot_poly_cntmovie(prot15_nores, pn20_all15nr, dmax, pn15nm_nores, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [None]:
np.save('1.5nm_NoRes.npy', trj_ppmap15nm_nr)    # .npy extension is added if not given

In [65]:
trj_ppmap15nm_nr = np.load('1.5nm_NoRes.npy', allow_pickle=True)

In [66]:
trj_ppmap15nm_nr[0].shape

(583, 60)

In [67]:
np.sum(trj_ppmap15nm_nr[1000][0])

0.0

In [68]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [69]:
pp_15nmNR_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        pp_15nmNR_ct[i][j] = np.sum(trj_ppmap15nm_nr[i][j])

In [70]:
pp_15nmNRtot = np.zeros(shape=(583))
for i in range(583):
    pp_15nmNRtot[i] = np.sum(pp_15nmNR_ct[:,i])
#pp_12nmtot

In [None]:
np.nonzero(pp_15nmNRtot)

In [None]:
plt.close('all')

In [71]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 1.5
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(y_pos+0.25, pp_15nmNRtot, align='center',width=wid, color='#0371EC', alpha=0.4, label='1.5 nm PLGA')
plt.bar(y_pos, pp_12nmtot_nr, align='center',width=wid, alpha=0.4, color='#87772C',label='1.2 nm PLGA')
plt.title("PLGA/BSA 100 ns Unrestrained", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim([0,600])
plt.ylim([0,50000])
plt.legend(fontsize=14)
plt.ylabel(r'Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block

In [72]:
apl_15nm_nr = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plg_1_5nm_nr.iterrows():
    if r_pl['mda_1.5nm_nr'] != 0:
        apl_15nm_nr.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cpl_15nm_nr = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(apl_15nm_nr)):
        if r_a['Amino_acids'] in apl_15nm_nr[i]:
            count += 1
    cpl_15nm_nr.append(count)      
        
aa_count['plga_1.5nm_100ns_NR'] = cpl_15nm_nr
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plga_1.2nm_100ns_NR,plga_1.5nm_100ns_NR
0,LYS,59,59,9,19
1,ARG,23,23,3,10
2,HIS,16,16,0,6
3,ASP,40,40,4,15
4,GLU,59,59,7,22
5,SER,23,23,1,6
6,THR,29,29,6,11
7,CYS,24,24,2,6
8,TYR,19,19,1,2
9,ASN,14,14,4,4


In [73]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plga_1.5nm_100ns_NR'].sum()

153

In [74]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [75]:
# This gives the total fraction of contacts within the 1.2 nm Rg 100 ns trajectory
aa_count['plga_1.5nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.3141683778234086

In [76]:
# Mean occupancy and std deviation 
ll_mo15_nr = [value[1] for key, value in h2di_15nr.items()]
print("Mean Occpancy (1.5 nm Rg Unrestrained): "+str(np.mean(ll_mo15_nr)), "Occ. std. dev.: "+str(np.std(ll_mo15_nr)))

Mean Occpancy (1.5 nm Rg Unrestrained): 0.41129738562091506 Occ. std. dev.: 0.38573051345826


In [77]:
cd_15nr = frac_cont(h2di_15nr)
cd_15nr

{'Negative': [163105.0, 37, 674461.2162162162, 0.21755178884426737],
 'Positive': [159513.0, 35, 697299.6857142857, 0.22491848358417288],
 'Polar': [150588.0, 35, 658284.6857142857, 0.21233394523313726],
 'Hydrophobic': [156079.0, 46, 519132.32608695654, 0.16744945961562546],
 'Aromatic': [43220.0, 12, 551055.0, 0.17774632272279706]}

### Calc. fractional contacts for each AA group type 

In [78]:
fcntrg1_5nm_nr, prgrp1_5nm_nr, aamatx_15nm_nr = bavg_frac_cnt(5, prot15_nores, pn20_all15nr, dmax,
                                                        pn15nm_nores, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [79]:
fcntrg1_5nm_nr

{'Negative': array([0.21290275, 0.21089677, 0.17656193, 0.20270584, 0.20340443]),
 'Positive': array([0.20929357, 0.19868204, 0.21289176, 0.20685673, 0.19498902]),
 'Polar': array([0.24804031, 0.22174973, 0.19192209, 0.20815634, 0.20606201]),
 'Hydrophobic': array([0.18593079, 0.16820837, 0.16279642, 0.16832093, 0.16940889]),
 'Aromatic': array([0.14383258, 0.20046309, 0.25582781, 0.21396016, 0.22613565]),
 'total_frac': array([0.23613963, 0.23819302, 0.23613963, 0.25051335, 0.22792608])}

In [80]:
fc15nm_mean_nr = np.array([np.mean(fcntrg1_5nm_nr['Negative']), np.mean(fcntrg1_5nm_nr['Positive'])
                        ,np.mean(fcntrg1_5nm_nr['Polar']),np.mean(fcntrg1_5nm_nr['Hydrophobic'])
                        , np.mean(fcntrg1_5nm_nr['Aromatic'])])
fc15nm_mean_nr

array([0.20129435, 0.20454263, 0.2151861 , 0.17093308, 0.20804386])

In [81]:
fc15nm_std_nr = np.array([np.std(fcntrg1_5nm_nr['Negative']), np.std(fcntrg1_5nm_nr['Positive'])
                       ,np.std(fcntrg1_5nm_nr['Polar']),np.std(fcntrg1_5nm_nr['Hydrophobic'])
                       , np.std(fcntrg1_5nm_nr['Aromatic'])])
fc15nm_std_nr

array([0.01300076, 0.0066821 , 0.01895448, 0.00784484, 0.03694909])

In [115]:
x_pos = np.arange(5)
width = 0.35
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, fc_12nmnr_mean, width, yerr=fc_12nmnr_std, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, fc15nm_mean_nr, width, yerr=fc15nm_std_nr, ecolor='black',capsize=5, color='c')
plt.title(r'Fractional Contacts Rg unrestrained', fontsize=15)
plt.xticks(x_pos+width/2, labels=aa_types, fontsize=12)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm'], frameon=False)
plt.ylabel(r'Fractional Contacts', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Fractional Contacts')

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [83]:
np.mean(fcntrg1_5nm_nr['total_frac'])

0.23778234086242298

In [84]:
np.std(fcntrg1_5nm_nr['total_frac'])

0.007277225932923757

### Avg no. PLGA residues per BSA AA residue group 

In [None]:
prgrp1_5nm_nr

In [None]:
mean_15nm_nr = np.zeros(shape=5)
std_15nm_nr = np.zeros(shape=5)
count = 0
for key, value in prgrp1_5nm_nr.items():
    mpl_15nm_nr = []
    var_15nm_nr = []
    for i in prgrp1_5nm_nr[str(key)].flat:
        mpl_15nm_nr.append(i[0])
        var_15nm_nr.append((i[1])**2)
    
    # calc frac cont averages
    mean_15nm_nr[count] = np.mean(mpl_15nm_nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_15nm_nr[count] = np.std(mpl_15nm_nr)
    #std_15nm_nr[count] = np.sqrt(np.sum(var_15nm_nr)/5)
    
    count += 1


In [None]:
mean_15nm_nr

In [None]:
std_15nm_nr

In [None]:
x_pos = np.arange(5)
width = 0.35
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, mean_12nm_nr, width, yerr=std_12nm_nr, ecolor='black',capsize=5, color='green')
plt.bar(x_pos+width, mean_15nm_nr, width, yerr=std_15nm_nr, ecolor='black',capsize=5, color='violet')
plt.title(r'No. of PLGA residues Rg unrestrained', fontsize=15)
plt.xticks(x_pos+width/2, labels=aa_types, fontsize=12)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm'], frameon=False)
plt.ylabel(r'No. of PLGA residues', fontsize=15)

### Protein/polymer contact map movie

In [None]:
trj_ppmap15nm_nr = prot_poly_cntmovie(prot15_nores, pn20_all15nr, dmax, pn15nm_nores, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [None]:
np.save('1.5nm_NoRes.npy', trj_ppmap15nm_nr)    # .npy extension is added if not given

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 1.5 nm Unres", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_ppmap15nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('1.5nm_NoRes.mp4', writer='ffmpeg', fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

# 2 nm PLGA unrestrained Rg 100 ns trajectory

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [85]:
# load the unrestrained trajectory 
pn2nm_nores = mda.Universe("../2nm_bsa_prod/2nmres_off/new_conf2nmnm.pdb"
                      , "../2nm_bsa_prod/2nmres_off/pp_resoffnopbc.xtc")

Check that we are on the first frame

In [86]:
pn2nm_nores.trajectory.frame

0

In [87]:
p2nm_len_nr = len(pn2nm_nores.trajectory)
p2nm_len_nr

10001

In [88]:
#Select all the PLGA residues, heavy atoms only 
p2nm_allnr = pn2nm_nores.select_atoms("resname sPLG PLG tPLG and not type H")
p2nm_allnr

<AtomGroup with 543 atoms>

In [89]:
# Select BSA residues, heavy atoms only 
prot2nm_nores = pn2nm_nores.select_atoms("protein and not type H")
prot2nm_nores

<AtomGroup with 4653 atoms>

### Calc. total fraction of contacts

In [90]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = p2nm_len_nr - 1
s_time = timeit.default_timer()
h2di_2nr = aa_frmcount(prot2nm_nores, p2nm_allnr, dmax, pn2nm_nores, start, end)
timeit.default_timer() - s_time

676.4039867212996

In [91]:
len(h2di_2nr.keys())

162

In [107]:
pr_2nr = list(prot2nm_nores.residues)
ss_2nr = [str(row) for row in pr_2nr]
rkg_2nm = {key:h2di_2nr[key][0] for key, value in h2di_2nr.items()}
plg_2nm_nr = pd.DataFrame(data=ss_2nr, columns=["BSA_des_res"])
plg_2nm_nr['mda_2nm_nr'] = plg_2nm_nr['BSA_des_res'].map(rkg_2nm)
plg_2nm_nr['BSA_des_res'] = red_bsa
plg_2nm_nr['mda_2nm_nr'] = plg_2nm_nr['mda_2nm_nr'].replace('nan', np.nan).fillna(0)
plg_2nm_nr.head()

Unnamed: 0,BSA_des_res,mda_2nm_nr
0,ASP 1,0.0
1,THR 2,0.0
2,HIS 3,0.0
3,LYS 4,0.0
4,SER 5,0.0


In [24]:
# Extract mean occupancy values
pr_res_2ur = list(prot2nm_nores.residues)
ss_res_2ur = [str(row) for row in pr_res_2ur]
rkg_15ur = {key:h2di_2nr[key][1] for key, value in h2di_2nr.items()}
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['BSA_des_res'].map(rkg_2ur)
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['mda_occ_2nm_NR'].replace('nan', np.nan).fillna(0)
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['mda_occ_2nm_NR'].round(2)
plg_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NR
0,"<Residue ASP, 1>",0.00
1,"<Residue THR, 2>",0.00
2,"<Residue HIS, 3>",0.00
3,"<Residue LYS, 4>",0.00
4,"<Residue SER, 5>",0.00
...,...,...
578,"<Residue GLN, 579>",0.00
579,"<Residue THR, 580>",0.00
580,"<Residue ALA, 581>",0.01
581,"<Residue LEU, 582>",0.00


In [128]:
# Extract mean occupancy values
pr_res_2ur = list(prot2nm_nores.residues)
ss_res_2ur = [str(row) for row in pr_res_2ur]
rkg_2ur = {key:h2di_2nr[key][1] for key, value in h2di_2nr.items()}
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['BSA_des_res'].map(rkg_2ur)
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['mda_occ_2nm_NR'].replace('nan', np.nan).fillna(0)
plg_1_2nm_NRocc['mda_occ_2nm_NR'] = plg_1_2nm_NRocc['mda_occ_2nm_NR'].round(2)
plg_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NR,mda_occ_1.5nm_NR,mda_occ_2nm_NR
0,"<Residue ASP, 1>",0.00,0.01,0.00
1,"<Residue THR, 2>",0.00,0.38,0.00
2,"<Residue HIS, 3>",0.00,0.66,0.00
3,"<Residue LYS, 4>",0.00,0.70,0.00
4,"<Residue SER, 5>",0.00,0.27,0.00
...,...,...,...,...
578,"<Residue GLN, 579>",0.00,0.00,0.97
579,"<Residue THR, 580>",0.00,0.00,0.95
580,"<Residue ALA, 581>",0.01,0.00,0.50
581,"<Residue LEU, 582>",0.00,0.00,0.12


In [129]:
m_occ = np.zeros(shape=(4653))
m_occ.shape

(4653,)

In [130]:
bsa_r = np.array(list(prot2nm_nores.resids)) # shape is 4652
m_occ_2nmNR = np.array(list(plg_1_2nm_NRocc['mda_occ_2nm_NR'])) # shape is 583
m_occ = np.zeros(shape=(4653))
at_ind = np.where(bsa_r[:-1] != bsa_r[1:])[0]
at_in_nw = np.sort(np.append([0,4653],at_ind))
nw_v = 0
for i in range(583):
    b = at_in_nw[i+1] +1
    m_occ[nw_v:b] = m_occ_2nmNR[i]
    nw_v = at_in_nw[i+1] + 1 

In [131]:
m_occ[633:646]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

### Visualize Occupanct on protein 

In [132]:
prot2nm_nores.occupancies = m_occ
prot2nm_nores.occupancies

array([0.  , 0.  , 0.  , ..., 0.31, 0.31, 0.31])

In [133]:
with mda.Writer("prot_2nmNoRes.pdb") as pdb:
    pdb.write(prot2nm_nores)

### Residue Importance: 2 nm Unrestrained 

In [None]:
trj_ppmap2nm_nr = prot_poly_cntmovie(prot2nm_nores, p2nm_allnr, dmax, pn2nm_nores, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [None]:
trj_ppmap2nm_nr.shape

In [None]:
np.save('2nm_NoRes.npy', trj_ppmap2nm_nr)    # .npy extension is added if not given

In [98]:
trj_ppmap2nm_nr = np.load('2nm_NoRes.npy', allow_pickle=True)

In [99]:
trj_ppmap2nm_nr[0].shape

(583, 60)

In [100]:
np.sum(trj_ppmap2nm_nr[1000][0])

0.0

In [101]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [102]:
pp_2nmNR_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        pp_2nmNR_ct[i][j] = np.sum(trj_ppmap2nm_nr[i][j])

In [103]:
pp_2nmNRtot = np.zeros(shape=(583))
for i in range(583):
    pp_2nmNRtot[i] = np.sum(pp_2nmNR_ct[:,i])
#pp_12nmtot

In [104]:
np.nonzero(pp_2nmNRtot)

(array([ 16, 118, 119, 123, 125, 126, 127, 128, 129, 130, 131, 161, 164,
        165, 166, 167, 168, 169, 170, 171, 172, 222, 223, 224, 225, 263,
        264, 265, 266, 267, 268, 269, 270, 271, 272, 274, 275, 280, 292,
        293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 306, 330, 331,
        334, 335, 336, 337, 338, 339, 346, 350, 353, 354, 357, 358, 361,
        366, 367, 369, 370, 373, 374, 375, 376, 377, 378, 379, 380, 381,
        382, 383, 384, 386, 387, 388, 391, 392, 395, 408, 412, 439, 440,
        441, 442, 443, 444, 445, 473, 478, 481, 484, 485, 487, 488, 489,
        490, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503,
        505, 506, 507, 508, 509, 510, 533, 534, 535, 536, 537, 539, 540,
        541, 543, 544, 545, 547, 548, 551, 555, 560, 561, 562, 563, 564,
        565, 566, 567, 568, 569, 571, 572, 573, 574, 575, 576, 577, 578,
        579, 580, 581, 582]),)

In [134]:
aNR = np.sum(pp_2nmNRtot)
aNR

604172.0

In [135]:
bNR = np.sum(pp_15nmNRtot)
bNR

1106640.0

In [137]:
cNR = np.sum(pp_12nmtot_nr)
cNR

246122.0

In [139]:
plt.close('all')

In [141]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 1.5
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(y_pos, pp_12nmtot_nr/cNR, align='center',width=wid, alpha=0.4, color='#1D77CF',label='1.2 nm PLGA')
plt.bar(y_pos+0.25, pp_15nmNRtot/bNR, align='center',width=wid, color='#562A8B', alpha=0.3, label='1.5 nm PLGA')
plt.bar(y_pos+0.3, pp_2nmNRtot/aNR, align='center',width=wid, color='#4E4C4D', alpha=0.3, label='2 nm PLGA')
#plt.bar(y_pos+0.25, pp_15nmNRtot/bNR, align='center',width=wid, color='#562A8B', alpha=0.3, label='1.5 nm PLGA')
#plt.bar(y_pos, pp_12nmtot_nr/cNR, align='center',width=wid, alpha=0.4, color='#1D77CF',label='1.2 nm PLGA')
plt.title("BSA in water with PLGA unrestrained, 100 ns", fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim([0,600])
plt.ylim([0,0.2])
plt.legend(fontsize=14)
plt.ylabel(r'Normalized Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block

In [108]:
apl_2nm_nr = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plg_2nm_nr.iterrows():
    if r_pl['mda_2nm_nr'] != 0:
        apl_2nm_nr.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cpl_2nm_nr = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(apl_2nm_nr)):
        if r_a['Amino_acids'] in apl_2nm_nr[i]:
            count += 1
    cpl_2nm_nr.append(count)      
        
aa_count['plga_2nm_100ns_NR'] = cpl_2nm_nr
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plga_1.2nm_100ns_NR,plga_1.5nm_100ns_NR,plga_2nm_100ns_NR
0,LYS,59,59,9,19,23
1,ARG,23,23,3,10,5
2,HIS,16,16,0,6,4
3,ASP,40,40,4,15,15
4,GLU,59,59,7,22,23
5,SER,23,23,1,6,7
6,THR,29,29,6,11,9
7,CYS,24,24,2,6,8
8,TYR,19,19,1,2,3
9,ASN,14,14,4,4,6


In [109]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plga_2nm_100ns_NR'].sum()

162

In [110]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [111]:
# This gives the total fraction of contacts within the 2 nm unrestrained Rg 100 ns trajectory
aa_count['plga_2nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.3326488706365503

In [112]:
# Mean occupancy and std deviation 
ll_mo2_nr = [value[1] for key, value in h2di_2nr.items()]
print("Mean Occpancy (2 nm Rg): "+str(np.mean(ll_mo2_nr)), "Occ. std. dev.: "+str(np.std(ll_mo2_nr)))

Mean Occpancy (2 nm Rg): 0.205141975308642 Occ. std. dev.: 0.2913285756303597


In [113]:
cd_2nm = frac_cont(h2di_2nr)
cd_2nm

{'Negative': [70240.0, 38, 299444.2105263158, 0.1875415722770907],
 'Positive': [73632.0, 32, 372762.0, 0.2334604213662337],
 'Polar': [83459.0, 41, 329764.82926829264, 0.20653134169453893],
 'Hydrophobic': [104999.0, 51, 333526.23529411765, 0.2088871060581774],
 'Aromatic': [19347.0, 12, 261184.5, 0.16357955860395929]}

### Calc. fractional contacts for each AA group type 

In [None]:
test_nr = aa_frmcount(prot2nm_nores, p2nm_allnr, dmax, pn2nm_nores, 6000, 8000)

In [None]:
frac_cont(test_nr)

In [None]:
no_surf

In [116]:
fcntrg2nm_nr, prgrp2nm_nr, aamatx_2nm_nr = bavg_frac_cnt(5, prot2nm_nores, p2nm_allnr, dmax, pn2nm_nores, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [117]:
fcntrg2nm_nr

{'Negative': array([0.19869776, 0.18394886, 0.21732898, 0.21216704, 0.16586192]),
 'Positive': array([0.24566516, 0.21907163, 0.17058257, 0.24513383, 0.23071696]),
 'Polar': array([0.19818883, 0.23535233, 0.25968944, 0.25791225, 0.20380805]),
 'Hydrophobic': array([0.16966378, 0.18041806, 0.20033888, 0.20394694, 0.24491677]),
 'Aromatic': array([0.18778448, 0.18120912, 0.15206013, 0.08083994, 0.1546963 ]),
 'total_frac': array([0.24024641, 0.19301848, 0.1724846 , 0.1724846 , 0.1724846 ])}

In [118]:
fc2nm_mean_nr = np.array([np.mean(fcntrg2nm_nr['Negative']), np.mean(fcntrg2nm_nr['Positive'])
                        ,np.mean(fcntrg2nm_nr['Polar']),np.mean(fcntrg2nm_nr['Hydrophobic'])
                        , np.mean(fcntrg2nm_nr['Aromatic'])])
fc2nm_mean_nr

array([0.19560091, 0.22223403, 0.23099018, 0.19985689, 0.15131799])

In [119]:
fc2nm_std_nr = np.array([np.std(fcntrg2nm_nr['Negative']), np.std(fcntrg2nm_nr['Positive'])
                       ,np.std(fcntrg2nm_nr['Polar']),np.std(fcntrg2nm_nr['Hydrophobic']), np.std(fcntrg2nm_nr['Aromatic'])])
#fc2nm_std_nr[4] = 0.05
fc2nm_std_nr

array([0.01884841, 0.02765231, 0.02600873, 0.02583349, 0.03795366])

In [120]:
x_pos = np.arange(5)
width = 0.3
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, fc_12nmnr_mean, width, yerr=fc_12nmnr_std, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, fc15nm_mean_nr, width, yerr=fc15nm_std_nr, ecolor='black',capsize=5, color='c')
plt.bar(x_pos+(2*width), fc2nm_mean_nr, width, yerr=fc2nm_std_nr, ecolor='black',capsize=5, color='lightslategray')
plt.title(r'Fractional Contacts Rg unrestrained', fontsize=15)
plt.xticks(x_pos+width, labels=aa_types, fontsize=14)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm', 'Rg = 2 nm'], frameon=False)
plt.ylabel(r'Fractional Contacts', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Fractional Contacts')

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [121]:
np.mean(fcntrg2nm_nr['total_frac'])

0.19014373716632443

In [122]:
np.std(fcntrg2nm_nr['total_frac'])

0.026283367556468176

### Avg no. PLGA residues per BSA AA residue group 

In [None]:
prgrp2nm_nr

In [None]:
mean_2nm_nr = np.zeros(shape=5)
std_2nm_nr = np.zeros(shape=5)
count = 0
for key, value in prgrp2nm_nr.items():
    mpl_2nm_nr = []
    var_2nm_nr = []
    for i in prgrp2nm_nr[str(key)].flat:
        mpl_2nm_nr.append(i[0])
        var_2nm_nr.append((i[1]))
    
    # calc frac cont averages
    mean_2nm_nr[count] = np.mean(mpl_2nm_nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_2nm_nr[count] = np.std(mpl_2nm_nr)
    # std_2nm_nr[count] = np.sqrt(np.sum(var_2nm_nr)/5)
    
    count += 1


In [None]:
mean_2nm_nr

In [None]:
#np.std()
#std_2nm_nr[4] = 1
std_2nm_nr

In [None]:
x_pos = np.arange(5)
width = 0.3
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
plt.figure(figsize=(7,7))
plt.bar(x_pos, mean_12nm_nr, width, yerr=std_12nm_nr, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, mean_15nm_nr, width, yerr=std_15nm_nr, ecolor='black',capsize=5, color='c')
plt.bar(x_pos+(2*width), mean_2nm_nr, width, yerr=std_2nm_nr, ecolor='black',capsize=5, color='lightslategray')
plt.title(r'No. of PLGA residues Rg unrestrained', fontsize=15)
plt.xticks(x_pos+width, labels=aa_types, fontsize=14)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm', 'Rg = 2 nm'], frameon=False)
plt.ylabel(r'No. of PLGA residues', fontsize=15)

### Protein/polymer contact map movie

In [None]:
for i in range(1000):
    print(np.nonzero(trj_load2nm[i]))

In [None]:
trj_load2nm[0].shape

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 2 nm Unres.", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_ppmap2nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('2nm_NoRes.mp4', writer='ffmpeg', fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

In [None]:
plt.close('all')