In [1]:
#import the necessary modules 
%matplotlib inline 
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
#import scipy
import sklearn
import itertools as it
from itertools import cycle 
import os.path as op
import timeit 
import json
from matplotlib import animation
import matplotlib.font_manager as font_manager
from collections import namedtuple
#from functools import partial
#from pathlib import Path

In [2]:
# Set plotting style
plt.style.use('seaborn-white')

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
#import matplotlib.pyplot as plt

In [3]:
%matplotlib widget

In [4]:
import multiprocessing as m_proc
m_proc.cpu_count()

4

### Now use MD Analysis to calculate no. of frames a center PLGA residues and terminal PLGA residue is with 4 Angstroms of BSA (1.2 nm restrained system)

Import MDAnalysis

In [5]:
from prot_polymer_analysis import get_protresd_list, aa_frmcount, grptwocnt_aa, gtwo_trjcnt 
from prot_polymer_analysis import frac_cont, bavg_frac_cnt, prot_poly_cntmovie, AA_list_org

In [6]:
# Import MDAnalysis
import MDAnalysis as mda
import MDAnalysis.analysis.distances as maa_dist

### First table will be total fractional contacts and oligomer occupancy values for each Rg value 

#### Distance-based analysis 

Find residues that have at least one atom within a cutoff $d = 4.0$ Angstrom near water molecules in BSA/water simulation

Calculate the number of surface bsa residues from a 1 ns BSA/water simulation

In [7]:
#Units of Angstroms 
dmax = 4.0 

In [8]:
def middle_of_band(band_start, band_stop, plot_min=0, plot_max=60):
    half_way = (band_stop - band_start) / 2
    mid_band = band_start + half_way
    plot_fraction = (mid_band - plot_min) / (plot_max - plot_min)

    return plot_fraction

# 1.2 nm PLGA unrestrained Rg 100 ns trajectory C2

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [9]:
# load the unrestrained trajectory 
pn12nm_noresC2 = mda.Universe("../C2_bsa_n20plga_inwater/C2_plgaNoRes/1.2nmNoRes_C2/1.2nmC2NR_bplga.pdb"
                      , "../C2_bsa_n20plga_inwater/C2_plgaNoRes/1.2nmNoRes_C2/nowat_12nmC2NR.xtc")

Check that we are on the first frame

In [10]:
pn12nm_noresC2.trajectory.frame

0

In [11]:
pn20nr_lenC2 = len(pn12nm_noresC2.trajectory)
pn20nr_lenC2

10001

In [12]:
#Select all the PLGA residues, heavy atoms only 
pn20_allnrC2 = pn12nm_noresC2.select_atoms("resname sPLG PLG tPLG and not type H")
pn20_allnrC2

<AtomGroup with 543 atoms>

In [13]:
# Select BSA residues, heavy atoms only 
prot_noresC2 = pn12nm_noresC2.select_atoms("protein and not type H")
prot_noresC2

<AtomGroup with 4653 atoms>

### Contact Analysis

In [14]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = pn20nr_lenC2 - 1
snr_time = timeit.default_timer()
h2di_C2nr = aa_frmcount(prot_noresC2, pn20_allnrC2, dmax, pn12nm_noresC2, start, end)
timeit.default_timer() - snr_time

941.2504487065598

In [15]:
len(h2di_C2nr.keys())

151

In [16]:
a_a = ["GLY","ALA","VAL","LEU","ILE","MET","PHE","TRP","PRO","SER","THR","CYS","TYR","ASN","GLN","ASP"
               ,"GLU","LYS","ARG","HIS"]

In [17]:
# This code chunk gets the BSA residues and their corresponding number in a pandas dataframe 
red_bsa = []
bh = np.arange(0,584)
for i in range(583):
    b_str = str(list(prot_noresC2.residues[i:i+1]))
    if str(bh[i+1]) in b_str: 
        red_bsa.append(str(b_str[10:13])+" "+str(bh[i+1]))

In [18]:
# Extract no. of contacted frames
pr_resnr = list(prot_noresC2.residues)
ss_resnr = [str(row) for row in pr_resnr]
rkg_12nr = {key:h2di_C2nr[key][0] for key, value in h2di_C2nr.items()}
plgC2_1_2nr = pd.DataFrame(data=ss_resnr, columns=["BSA_des_res"])
plgC2_1_2nr['mda_1.2nm_noresC2'] = plgC2_1_2nr['BSA_des_res'].map(rkg_12nr)
plgC2_1_2nr['BSA_des_res'] = red_bsa
plgC2_1_2nr['mda_1.2nm_noresC2'] = plgC2_1_2nr['mda_1.2nm_noresC2'].replace('nan', np.nan).fillna(0)
plgC2_1_2nr.head()

Unnamed: 0,BSA_des_res,mda_1.2nm_noresC2
0,ASP 1,9987.0
1,THR 2,9923.0
2,HIS 3,9909.0
3,LYS 4,9940.0
4,SER 5,9502.0


In [20]:
# Extract mean occupancy values
pr_res_12ur = list(prot_noresC2.residues)
ss_res_12ur = [str(row) for row in pr_res_12ur]
rkg_12urC2 = {key:h2di_C2nr[key][1] for key, value in h2di_C2nr.items()}
plgC2_1_2nm_NRocc = pd.DataFrame(data=ss_res_12ur, columns=["BSA_des_res"])
plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'] = plgC2_1_2nm_NRocc['BSA_des_res'].map(rkg_12urC2)
plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'].replace('nan', np.nan).fillna(0)
plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'].round(2)
plgC2_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NRC2
0,"<Residue ASP, 1>",1.00
1,"<Residue THR, 2>",0.99
2,"<Residue HIS, 3>",0.99
3,"<Residue LYS, 4>",0.99
4,"<Residue SER, 5>",0.95
...,...,...
578,"<Residue GLN, 579>",0.00
579,"<Residue THR, 580>",0.00
580,"<Residue ALA, 581>",0.00
581,"<Residue LEU, 582>",0.00


In [23]:
bsa_C2r = np.array(list(prot_noresC2.resids)) # shape is 4652
mC2_occ_12ur = np.array(list(plgC2_1_2nm_NRocc['mda_occ_1.2nm_NRC2'])) # shape is 583
mC2_occ = np.zeros(shape=(4653))
atC2_indNR = np.where(bsa_C2r[:-1] != bsa_C2r[1:])[0]
atC2_in_nwNR = np.sort(np.append([0,4653],atC2_indNR))
nw_v = 0
for i in range(583):
    b = atC2_in_nwNR[i+1] +1
    mC2_occ[nw_v:b] = mC2_occ_12ur[i]
    nw_v = atC2_in_nwNR[i+1] + 1 

In [24]:
np.nonzero(mC2_occ)

(array([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
          11,   12,   13,   14,   15,   16,   17,   18,   19,   20,   21,
          22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
          33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,
          44,   45,   46,   47,   48,   57,   58,   59,   60,   61,   83,
          84,   85,   86,   87,   88,   89,   90,   91,   92,   93,   94,
          95,   96,   97,   98,   99,  100,  101,  102,  103,  104,  105,
         106,  107,  108,  109,  110,  111,  112,  113,  114,  115,  116,
         117,  118,  119,  120,  121,  122,  123,  124,  125,  126,  127,
         128,  129,  130,  131,  132,  133,  134,  135,  136,  137,  138,
         139,  140,  141,  142,  143,  144,  145,  146,  147,  148,  149,
         150,  389,  390,  391,  392,  393,  394,  395,  396,  397,  414,
         415,  416,  417,  418,  419,  420,  421,  422,  423,  424,  425,
         426,  427,  428,  429,  430, 

### Visualize Occupanct on protein 

In [25]:
prot_noresC2.occupancies = mC2_occ
prot_noresC2.occupancies

array([1., 1., 1., ..., 0., 0., 0.])

In [26]:
with mda.Writer("prot_12nm_noresC2.pdb") as pdb:
    pdb.write(prot_noresC2)

### Residue Importance: 1.2 nm Unrestrained C2

In [27]:
trj_pp12nmC2_nr = prot_poly_cntmovie(prot_noresC2, pn20_allnrC2, dmax, pn12nm_noresC2, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [28]:
np.save('1.2nmPLGA_NoResC2.npy', trj_pp12nmC2_nr)    # .npy extension is added if not given

In [None]:
trj_pp12nm_nr = np.load("1.2nm_NoRes.npy", allow_pickle=True)

In [29]:
np.sum(trj_pp12nmC2_nr[1000][0])

4.0

In [30]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [31]:
ppC2_12nmur_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        ppC2_12nmur_ct[i][j] = np.sum(trj_pp12nmC2_nr[i][j])

In [32]:
ppC2_12nmtot_nr = np.zeros(shape=(583))
for i in range(583):
    ppC2_12nmtot_nr[i] = np.sum(ppC2_12nmur_ct[:,i])
#pp_12nmtot

In [33]:
np.nonzero(ppC2_12nmtot_nr)

(array([  0,   1,   2,   3,   4,   5,   7,   8,  10,  11,  12,  13,  14,
         15,  16,  17,  19,  47,  50,  51,  52,  53,  54,  55,  56,  57,
         58,  61,  62,  63,  64,  65, 118, 119, 120, 122, 123, 125, 126,
        127, 128, 129, 130, 133, 153, 156, 157, 158, 160, 161, 162, 163,
        164, 165, 166, 167, 168, 169, 170, 171, 172, 174, 175, 178, 179,
        231, 250, 257, 260, 263, 266, 274, 275, 277, 278, 279, 280, 281,
        282, 283, 284, 349, 350, 353, 354, 357, 358, 366, 367, 370, 371,
        373, 374, 375, 376, 377, 378, 380, 381, 383, 384, 385, 387, 388,
        391, 392, 393, 394, 395, 396, 397, 399, 400, 403, 404, 405, 407,
        408, 409, 411, 412, 470, 473, 474, 477, 478, 481, 483, 484, 485,
        487, 488, 489, 490, 491, 493, 527, 536, 537, 538, 539, 540, 542,
        543, 544, 546, 547, 550, 551, 581, 582]),)

In [34]:
plt.close('all')

In [None]:
plt.bar(y_pos, pp_12nmtot_nr/cNR, align='center',width=wid, alpha=0.4, color='#1D77CF',label='1.2 nm PLGA')

In [35]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 1.5
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(y_pos+0.25, ppC2_12nmtot_nr/np.sum(ppC2_12nmtot_nr), align='center',width=wid, color='#1D77CF', alpha=0.3, label='1.2 nm PLGA')
plt.title("BSA in water with PLGA unrestrained, 100 ns, Config. 2", fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim([0,600])
plt.ylim([0,0.2])
plt.legend(fontsize=14)
plt.ylabel(r'Normalized Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block C2

In [36]:
# Read in data from the oputput of wrapper.sh, where the frame count is given for each BSA residue that was within 
# 4 angstroms of PLGA trimer 
wat_data = pd.read_csv('occ_BSA1ns.txt', sep=" ", header=None, usecols=None ,index_col=None)
wat_data.columns = ["BSA_res_no","No. of frames (VMD)"]
wat_data = wat_data.drop("BSA_res_no", axis=1)

pr_res = list(prot_noresC2.residues)
ss_res = [str(row) for row in pr_res]

wat_data['BSA_des_res'] = ss_res
wat_data = wat_data[['BSA_des_res',"No. of frames (VMD)"]]
#wat_data.head()

# load MDAnalysis values from MDA_BSA1ns.txt file(129003 atoms SOL group was used to calc. frame counts for txt.
# file)
h2ob_dict = json.load(open("MDA_BSA1ns.txt"))
wat_data['Mda_frames'] = wat_data['BSA_des_res'].map(h2ob_dict)


# From MD Analysis
#Get the count of bsa residues that have 1001 or 1002 frames ( I ran a 1 ns NPT simulation of 1 BSA in water )

#aa_count = pd.DataFrame(data=a_a)
c_list = []

for i in range(len(a_a)):
    count = 0
    for index, row in wat_data.iterrows():
        if a_a[i] in row["BSA_des_res"]:
            if row['Mda_frames'] == 1001: 
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames']))) 
            elif row['Mda_frames'] == 1000:
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames'])))
    c_list.append(str(str(a_a[i])+"  "+str(count)))
    
#c_list

# From VMD
#Get the count of bsa residues that have 1001 or 1002 frames ( I ran a 1 ns NPT simulation of 1 BSA in water )

#aa_count = pd.DataFrame(data=a_a)
vmd_list = []

for i in range(len(a_a)):
    count = 0
    for index, row in wat_data.iterrows():
        if a_a[i] in row["BSA_des_res"]:
            if row["No. of frames (VMD)"] == 1001: 
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames']))) 
            elif row["No. of frames (VMD)"] == 1002:
                count += 1
                #c_list.append(str(str(a_a[i])+"  "+str(row['No. of frames'])))
    vmd_list.append(str(str(a_a[i])+"  "+str(count)))

# Main difference is that Alanine 583 is counted for all 1001 frames. It seems VMD is unable to calc dist for that res
#vmd_list

#hydrophobic_res = ['ALA', 'ILE', 'LEU', 'VAL', 'GLY', 'PRO','PHE', 'TRP','MET']
#polar_res = ['ASN', 'CYS', 'GLN', 'SER', 'THR','TYR']
#neg_res = ['ASP', 'GLU']
#pos_res = ['ARG', 'HIS', 'LYS']
# aromatic_res = ['PHE', 'TRP', 'TYR', 'HIS']
#all_res = [pos_res, neg_res, polar_res, hydrophobic_res]

# Put the AA count in a pandas dataframe 
dg , ji = AA_list_org(c_list)
aa_count = pd.DataFrame(data=dg, index=None, columns=['Amino_acids'])
new_lf = pd.Series(data=ji, index=None)
vmg, vmdj = AA_list_org(vmd_list)
n2lf = pd.Series(data=vmdj, index=None)
aa_count['No_of_surf_res (MDAnalysis)'] = new_lf
aa_count['No_of_surf_res (VMD)'] = n2lf

In [37]:
apl_12nm_nrC2 = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plgC2_1_2nr.iterrows():
    if r_pl['mda_1.2nm_noresC2'] != 0:
        apl_12nm_nrC2.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cpl_12nm_nrC2 = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(apl_12nm_nrC2)):
        if r_a['Amino_acids'] in apl_12nm_nrC2[i]:
            count += 1
    cpl_12nm_nrC2.append(count)      
        
aa_count['plgaC2_1.2nm_100ns_NR'] = cpl_12nm_nrC2
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plgaC2_1.2nm_100ns_NR
0,LYS,59,59,25
1,ARG,23,23,3
2,HIS,16,16,5
3,ASP,40,40,11
4,GLU,59,59,21
5,SER,23,23,5
6,THR,29,29,8
7,CYS,24,24,12
8,TYR,19,19,2
9,ASN,14,14,6


In [38]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plgaC2_1.2nm_100ns_NR'].sum()

151

In [39]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [40]:
# This gives the total fraction of contacts within the 1.2 nm Rg 100 ns trajectory
aa_count['plgaC2_1.2nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.31006160164271046

In [41]:
# Mean occupancy and std deviation 
ll_mo12_nr = [value[1] for key, value in h2di_C2nr.items()]
print("Mean Occpancy (1.2 nm unrestrained Rg): "+str(np.mean(ll_mo12_nr)), "Occ. std. dev.: "+str(np.std(ll_mo12_nr)))

Mean Occpancy (1.2 nm unrestrained Rg): 0.338282119205298 Occ. std. dev.: 0.3640055969088105


In [42]:
cd_12nm_nr = frac_cont(h2di_C2nr)
cd_12nm_nr

{'Negative': [134400.0, 32, 634200.0, 0.2451762139185758],
 'Positive': [115600.0, 33, 528957.5757575757, 0.2044904064140502],
 'Polar': [118578.0, 39, 459109.6923076923, 0.17748782108691624],
 'Hydrophobic': [142228.0, 47, 456945.2765957447, 0.17665107676398786],
 'Aromatic': [40331.0, 12, 507498.4166666666, 0.19619448181646984]}

In [43]:
no_surf = aa_count['No_of_surf_res (MDAnalysis)'].sum()
no_surf

487

### Calc. fractional contacts for each AA group type 

In [44]:
fcnt1_2nm_C2nr, prgrp12nm_C2nr, aamatx_12nm_C2nr = bavg_frac_cnt(5, prot_noresC2, pn20_allnrC2, dmax,
                                                        pn12nm_noresC2, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [45]:
fcnt1_2nm_C2nr

{'Negative': array([0.22257848, 0.21647773, 0.21419209, 0.24343697, 0.23579261]),
 'Positive': array([0.21751018, 0.17421094, 0.19761702, 0.21030902, 0.21419333]),
 'Polar': array([0.17846708, 0.1960051 , 0.17706685, 0.18798461, 0.19501763]),
 'Hydrophobic': array([0.1753968 , 0.17877725, 0.17439348, 0.1949802 , 0.19726609]),
 'Aromatic': array([0.20604746, 0.23452898, 0.23673056, 0.1632892 , 0.15773035]),
 'total_frac': array([0.16837782, 0.22997947, 0.22587269, 0.23613963, 0.23408624])}

In [46]:
fc_12nmnrC2_mean = np.array([np.mean(fcnt1_2nm_C2nr['Negative']), np.mean(fcnt1_2nm_C2nr['Positive'])
                        ,np.mean(fcnt1_2nm_C2nr['Polar']),np.mean(fcnt1_2nm_C2nr['Hydrophobic'])
                        , np.mean(fcnt1_2nm_C2nr['Aromatic'])])
fc_12nmnrC2_mean

array([0.22649558, 0.2027681 , 0.18690825, 0.18416277, 0.19966531])

In [47]:
fc_12nmnrC2_std = np.array([np.std(fcnt1_2nm_C2nr['Negative']), np.std(fcnt1_2nm_C2nr['Positive'])
                       ,np.std(fcnt1_2nm_C2nr['Polar']),np.std(fcnt1_2nm_C2nr['Hydrophobic'])
                       , np.std(fcnt1_2nm_C2nr['Aromatic'])])
fc_12nmnrC2_std

array([0.01131832, 0.01579098, 0.0079722 , 0.00989948, 0.03379882])

In [48]:
x_pos = np.arange(5)
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(x_pos, fc_12nmnrC2_mean, yerr=fc_12nmnrC2_std, ecolor='black',capsize=5, color='c')
plt.title(r'Fractional Contacts 1.2 nm Rg unrestrained, Config 2', fontsize=15)
plt.xticks(x_pos, labels=aa_types, fontsize=12)
plt.ylabel(r'Fractional Contacts', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Fractional Contacts')

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [49]:
np.mean(fcnt1_2nm_C2nr['total_frac'])

0.21889117043121148

In [50]:
np.std(fcnt1_2nm_C2nr['total_frac'])

0.025501724347039054

### Avg no. PLGA residues per BSA AA residue group 

In [51]:
prgrp12nm_C2nr

{'Negative': array([array([31.84      ,  4.78533175]),
        array([36.4715    ,  5.52450792]),
        array([42.2165    ,  5.14913854]),
        array([46.2195    ,  6.23605001]), array([44.9485  ,  5.245841])],
       dtype=object), 'Positive': array([array([23.146     ,  4.17165243]),
        array([32.439     ,  6.07620597]),
        array([34.3275    ,  6.02604711]),
        array([43.5465    ,  5.52700984]),
        array([41.7765    ,  4.94070316])], dtype=object), 'Polar': array([array([23.819     ,  5.31923293]),
        array([41.22      ,  9.24557191]),
        array([39.2205    ,  6.81673527]),
        array([46.53      ,  6.43094861]),
        array([47.7235    ,  6.21265223])], dtype=object), 'Hydrophobic': array([array([29.1065    ,  5.49610387]),
        array([32.9565    ,  6.45682645]),
        array([37.04      ,  5.87855424]),
        array([40.1055    ,  5.97163041]),
        array([45.241     ,  7.11462712])], dtype=object), 'Aromatic': array([array([8.062     

In [52]:
mean_12nm_C2nr = np.zeros(shape=5)
std_12nm_C2nr = np.zeros(shape=5)
count = 0
for key, value in prgrp12nm_C2nr.items():
    mpl_12nm_C2nr = []
    var_12nm_C2nr = []
    for i in prgrp12nm_C2nr[str(key)].flat:
        mpl_12nm_C2nr.append(i[0])
        var_12nm_C2nr.append((i[1])**2)
    
    # calc frac cont averages
    mean_12nm_C2nr[count] = np.mean(mpl_12nm_C2nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_12nm_C2nr[count] = np.std(mpl_12nm_C2nr)
    
    count += 1


In [53]:
mean_12nm_C2nr

array([40.3392, 35.0471, 39.7026, 36.8899, 11.8722])

In [54]:
std_12nm_C2nr

array([5.41374794, 7.30042212, 8.55303338, 5.58849596, 2.68426224])

In [55]:
x_pos = np.arange(5)
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(x_pos, mean_12nm_C2nr, yerr=std_12nm_C2nr, ecolor='black',capsize=5)
plt.title(r'No. of PLGA residues 1.2 nm Rg unrestrained, Config 2', fontsize=15)
plt.xticks(x_pos, labels=aa_types, fontsize=12)
plt.ylabel(r'No. of PLGA residues', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'No. of PLGA residues')

### Protein/polymer contact map movie

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 1.2 nm Unres", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_pp12nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('1.2nm_NoRes.mp4',writer='ffmpeg',fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

# 1.5 nm PLGA unrestrained Rg 100 ns trajectory

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [56]:
# load the unrestrained trajectory 
pn15nm_noresC2 = mda.Universe("../C2_bsa_n20plga_inwater/C2_plgaNoRes/1.5nmNoRes_C2/1.5nm_NoResplga.pdb"
                      , "../C2_bsa_n20plga_inwater/C2_plgaNoRes/1.5nmNoRes_C2/C2plga_NoRes.xtc")

Check that we are on the first frame

In [57]:
pn15nm_noresC2.trajectory.frame

0

In [58]:
pn20_15nrC2 = len(pn15nm_noresC2.trajectory)
pn20_15nrC2

10001

In [59]:
#Select all the PLGA residues, heavy atoms only 
pn20_all15nrC2 = pn15nm_noresC2.select_atoms("resname sPLG PLG tPLG and not type H")
pn20_all15nrC2

<AtomGroup with 543 atoms>

In [60]:
# Select BSA residues, heavy atoms only 
prot15_noresC2 = pn15nm_noresC2.select_atoms("protein and not type H")
prot15_noresC2

<AtomGroup with 4653 atoms>

### Contact Analysis

In [61]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = pn20_15nrC2 - 1
s_time15nr = timeit.default_timer()
h2diC2_15nr = aa_frmcount(prot15_noresC2, pn20_all15nrC2, dmax, pn15nm_noresC2, start, end)
timeit.default_timer() - s_time15nr

772.9708434622735

In [62]:
len(h2diC2_15nr.keys())

106

In [64]:
pr_15nr = list(prot15_noresC2.residues)
ss_15nr = [str(row) for row in pr_15nr]
rkg_15nm_nrC2 = {key:h2diC2_15nr[key][0] for key, value in h2diC2_15nr.items()}
plgC2_1_5nm_nr = pd.DataFrame(data=ss_15nr, columns=["BSA_des_res"])
plgC2_1_5nm_nr['mda_1.5nm_nrC2'] = plgC2_1_5nm_nr['BSA_des_res'].map(rkg_15nm_nrC2)
plgC2_1_5nm_nr['BSA_des_res'] = red_bsa
plgC2_1_5nm_nr['mda_1.5nm_nrC2'] = plgC2_1_5nm_nr['mda_1.5nm_nrC2'].replace('nan', np.nan).fillna(0)
plgC2_1_5nm_nr.head()

Unnamed: 0,BSA_des_res,mda_1.5nm_nrC2
0,ASP 1,10000.0
1,THR 2,9980.0
2,HIS 3,9675.0
3,LYS 4,9890.0
4,SER 5,8074.0


In [65]:
# Extract mean occupancy values
pr_res_15ur = list(pn15nm_noresC2.residues)
ss_res_15ur = [str(row) for row in pr_res_15ur]
rkgC2_15ur = {key:h2diC2_15nr[key][1] for key, value in h2diC2_15nr.items()}
plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'] = plgC2_1_2nm_NRocc['BSA_des_res'].map(rkgC2_15ur)
plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'].replace('nan', np.nan).fillna(0)
plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'].round(2)
plgC2_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NRC2,mda_occ_1.5nm_NRC2
0,"<Residue ASP, 1>",1.00,1.00
1,"<Residue THR, 2>",0.99,1.00
2,"<Residue HIS, 3>",0.99,0.97
3,"<Residue LYS, 4>",0.99,0.99
4,"<Residue SER, 5>",0.95,0.81
...,...,...,...
578,"<Residue GLN, 579>",0.00,0.00
579,"<Residue THR, 580>",0.00,0.00
580,"<Residue ALA, 581>",0.00,0.00
581,"<Residue LEU, 582>",0.00,0.00


In [66]:
bsa_15C2nr = np.array(list(prot15_noresC2.resids)) # shape is 4652
mC2_occ_15nmNR = np.array(list(plgC2_1_2nm_NRocc['mda_occ_1.5nm_NRC2'])) # shape is 583
mC2_occ15 = np.zeros(shape=(4653))
at15C2_ind = np.where(bsa_15C2nr[:-1] != bsa_15C2nr[1:])[0]
at15C2_in_nw = np.sort(np.append([0,4653],at15C2_ind))
nw_v = 0
for i in range(583):
    b = at15C2_in_nw[i+1] +1
    mC2_occ15[nw_v:b] = mC2_occ_15nmNR[i]
    nw_v = at15C2_in_nw[i+1] + 1 

In [67]:
np.nonzero(mC2_occ15)

(array([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
          11,   12,   13,   14,   15,   16,   17,   18,   19,   20,   21,
          22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
          33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,
          44,   45,   46,   47,   48,   57,   58,   59,   60,   61,   62,
          63,   64,   65,   66,   67,   68,   69,   70,   71,   83,   84,
          85,   86,   87,   88,   89,   90,   91,   92,   93,   94,   95,
          96,   97,   98,   99,  100,  101,  102,  103,  104,  105,  106,
         107,  108,  109,  110,  111,  112,  113,  114,  115,  116,  117,
         118,  119,  120,  121,  122,  123,  124,  125,  126,  127,  128,
         129,  130,  131,  132,  133,  134,  135,  136,  137,  138,  139,
         140,  141,  142,  143,  144,  145,  146,  147,  148,  149,  150,
         414,  415,  416,  417,  418,  419,  420,  421,  422,  430,  431,
         432,  433,  434,  435,  436, 

### Visualize Occupanct on protein 

In [68]:
prot15_noresC2.occupancies = mC2_occ15
prot15_noresC2.occupancies

array([1., 1., 1., ..., 0., 0., 0.])

In [69]:
with mda.Writer("prot_15nm_noresC2.pdb") as pdb:
    pdb.write(prot15_noresC2)

### Residue Importance: 1.5 nm Unrestrained C2

In [70]:
trj_ppmap15nmC2_nr = prot_poly_cntmovie(prot15_noresC2, pn20_all15nrC2, dmax, pn15nm_noresC2, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [71]:
np.save('1.5nmPLGA_NoResC2.npy', trj_ppmap15nmC2_nr)    # .npy extension is added if not given

In [None]:
trj_ppmap15nm_nr = np.load('1.5nm_NoRes.npy', allow_pickle=True)

In [72]:
trj_ppmap15nmC2_nr[0].shape

(583, 60)

In [73]:
np.sum(trj_ppmap15nmC2_nr[1000][0])

4.0

In [74]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [75]:
ppC2_15nmNR_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        ppC2_15nmNR_ct[i][j] = np.sum(trj_ppmap15nmC2_nr[i][j])

In [76]:
ppC2_15nmNRtot = np.zeros(shape=(583))
for i in range(583):
    ppC2_15nmNRtot[i] = np.sum(ppC2_15nmNR_ct[:,i])
#pp_12nmtot

In [77]:
np.nonzero(ppC2_15nmNRtot)

(array([  0,   1,   2,   3,   4,   5,   7,   8,  10,  11,  12,  13,  14,
         15,  16,  17,  19,  50,  52,  53,  54,  55,  56, 128, 129, 130,
        131, 133, 157, 160, 164, 259, 260, 263, 264, 266, 267, 274, 277,
        278, 279, 280, 281, 282, 283, 284, 293, 296, 297, 298, 299, 300,
        301, 302, 303, 304, 332, 334, 335, 336, 337, 338, 339, 344, 347,
        372, 373, 375, 376, 377, 379, 380, 381, 383, 384, 385, 386, 387,
        388, 390, 391, 392, 394, 395, 396, 408, 409, 411, 412, 436, 439,
        440, 441, 442, 443, 444, 445, 483, 484, 486, 487, 488, 489, 490,
        491, 539]),)

In [None]:
plt.close('all')

In [78]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 1.5
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
#plt.bar(y_pos, pp_12nmtot_nr/np.sum(pp_12nmtot_nr), align='center',width=wid, alpha=0.4, color='#0371EC',label='1.2 nm PLGA')
plt.bar(y_pos+0.25, ppC2_15nmNRtot/np.sum(ppC2_15nmNRtot), align='center',width=wid, color='#562A8B', alpha=0.4, label='1.5 nm PLGA')
#plt.bar(y_pos, pp_12nmtot_nr/np.sum(pp_12nmtot_nr), align='center',width=wid, alpha=0.4, color='#562A8B',label='1.2 nm PLGA')
plt.title("BSA in water with PLGA unrestrained, 100 ns, Config 2", fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim([0,600])
plt.ylim([0,0.2])
plt.legend(fontsize=14)
plt.ylabel(r'Normalized Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block

In [80]:
aplC2_15nm_nr = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plgC2_1_5nm_nr.iterrows():
    if r_pl['mda_1.5nm_nrC2'] != 0:
        aplC2_15nm_nr.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cplC2_15nm_nr = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(aplC2_15nm_nr)):
        if r_a['Amino_acids'] in aplC2_15nm_nr[i]:
            count += 1
    cplC2_15nm_nr.append(count)      
        
aa_count['plgaC2_1.5nm_100ns_NR'] = cplC2_15nm_nr
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plgaC2_1.2nm_100ns_NR,plgaC2_1.5nm_100ns_NR
0,LYS,59,59,25,15
1,ARG,23,23,3,6
2,HIS,16,16,5,5
3,ASP,40,40,11,10
4,GLU,59,59,21,13
5,SER,23,23,5,3
6,THR,29,29,8,3
7,CYS,24,24,12,5
8,TYR,19,19,2,3
9,ASN,14,14,6,4


In [81]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plgaC2_1.5nm_100ns_NR'].sum()

106

In [82]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [83]:
# This gives the total fraction of contacts within the 1.2 nm Rg 100 ns trajectory
aa_count['plgaC2_1.5nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.21765913757700206

In [84]:
# Mean occupancy and std deviation 
ll_mo15_nr = [value[1] for key, value in h2diC2_15nr.items()]
print("Mean Occpancy (1.5 nm Rg Unrestrained): "+str(np.mean(ll_mo15_nr)), "Occ. std. dev.: "+str(np.std(ll_mo15_nr)))

Mean Occpancy (1.5 nm Rg Unrestrained): 0.40043490566037737 Occ. std. dev.: 0.3950122185935502


In [85]:
cd_15nr = frac_cont(h2diC2_15nr)
cd_15nr

{'Negative': [88396.0, 23, 407390.2608695652, 0.17502027830217248],
 'Positive': [118599.0, 26, 483519.0, 0.2077261978815023],
 'Polar': [107827.0, 23, 496941.8260869565, 0.2134928226219326],
 'Hydrophobic': [109639.0, 34, 341815.70588235295, 0.14684857670354842],
 'Aromatic': [67699.0, 12, 598007.8333333334, 0.2569121244908441]}

### Calc. fractional contacts for each AA group type 

In [86]:
fcntrg1_5nmC2_nr, prgrp1_5nmC2_nr, aamatx_15nmC2_nr = bavg_frac_cnt(5, prot15_noresC2, pn20_all15nrC2, dmax,
                                                        pn15nm_noresC2, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [87]:
fcntrg1_5nmC2_nr

{'Negative': array([0.20659401, 0.16341879, 0.18081786, 0.1612494 , 0.15671605]),
 'Positive': array([0.2304329 , 0.17606894, 0.21554762, 0.19410702, 0.18304269]),
 'Polar': array([0.20387499, 0.216717  , 0.20983534, 0.20707696, 0.17109373]),
 'Hydrophobic': array([0.14539661, 0.17467522, 0.14669947, 0.17962185, 0.21540316]),
 'Aromatic': array([0.21370149, 0.26912005, 0.2470997 , 0.25794477, 0.27374437]),
 'total_frac': array([0.18480493, 0.17453799, 0.18480493, 0.15400411, 0.14373717])}

In [88]:
fc15nmC2_mean_nr = np.array([np.mean(fcntrg1_5nmC2_nr['Negative']), np.mean(fcntrg1_5nmC2_nr['Positive'])
                        ,np.mean(fcntrg1_5nmC2_nr['Polar']),np.mean(fcntrg1_5nmC2_nr['Hydrophobic'])
                        , np.mean(fcntrg1_5nmC2_nr['Aromatic'])])
fc15nmC2_mean_nr

array([0.17375922, 0.19983984, 0.2017196 , 0.17235926, 0.25232208])

In [89]:
fc15nmC2_std_nr = np.array([np.std(fcntrg1_5nmC2_nr['Negative']), np.std(fcntrg1_5nmC2_nr['Positive'])
                       ,np.std(fcntrg1_5nmC2_nr['Polar']),np.std(fcntrg1_5nmC2_nr['Hydrophobic'])
                       , np.std(fcntrg1_5nmC2_nr['Aromatic'])])
fc15nmC2_std_nr

array([0.01834032, 0.02031125, 0.0158876 , 0.02567604, 0.02140805])

In [90]:
x_pos = np.arange(5)
width = 0.35
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '500px'
fig.canvas.layout.height = '400px'
plt.bar(x_pos, fc_12nmnrC2_mean, width, yerr=fc_12nmnrC2_std, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, fc15nmC2_mean_nr, width, yerr=fc15nmC2_std_nr, ecolor='black',capsize=5, color='c')
plt.title(r'Fractional Contacts Rg unrestrained', fontsize=15)
plt.xticks(x_pos+width/2, labels=aa_types, fontsize=12)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm'], frameon=False)
plt.ylabel(r'Fractional Contacts', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Fractional Contacts')

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [91]:
np.mean(fcntrg1_5nmC2_nr['total_frac'])

0.16837782340862423

In [92]:
np.std(fcntrg1_5nmC2_nr['total_frac'])

0.016681803705617986

### Avg no. PLGA residues per BSA AA residue group 

In [93]:
prgrp1_5nmC2_nr

{'Negative': array([array([25.175     ,  3.39976102]),
        array([24.2115    ,  3.30102526]),
        array([24.1165    ,  3.10965074]),
        array([20.4885    ,  2.95936949]),
        array([20.6875    ,  3.02784474])], dtype=object),
 'Positive': array([array([42.2895    ,  6.33961274]), array([41.9745   ,  5.5297242]),
        array([42.358     ,  5.79550136]),
        array([40.09      ,  5.29319374]),
        array([35.6485    ,  4.27749316])], dtype=object),
 'Polar': array([array([32.5285   ,  5.1803656]), array([36.3645    ,  4.67189894]),
        array([38.255     ,  4.69190526]),
        array([32.8485    ,  6.16608042]), array([25.497   ,  5.775378])],
       dtype=object),
 'Hydrophobic': array([array([24.412     ,  4.77715983]),
        array([23.9185    ,  3.73307618]), array([24.275    ,  4.5443784]),
        array([19.9505    ,  3.71053227]),
        array([23.607     ,  3.82786507])], dtype=object),
 'Aromatic': array([array([19.5855    ,  4.09886445]),
        

In [94]:
mean_15nmC2_nr = np.zeros(shape=5)
std_15nmC2_nr = np.zeros(shape=5)
count = 0
for key, value in prgrp1_5nmC2_nr.items():
    mpl_15nmC2_nr = []
    var_15nmC2_nr = []
    for i in prgrp1_5nmC2_nr[str(key)].flat:
        mpl_15nmC2_nr.append(i[0])
        var_15nmC2_nr.append((i[1])**2)
    
    # calc frac cont averages
    mean_15nmC2_nr[count] = np.mean(mpl_15nmC2_nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_15nmC2_nr[count] = np.std(mpl_15nmC2_nr)
    #std_15nm_nr[count] = np.sqrt(np.sum(var_15nm_nr)/5)
    
    count += 1


In [95]:
mean_15nmC2_nr

array([22.9358, 40.4721, 33.0987, 23.2326, 20.5404])

In [96]:
std_15nmC2_nr

array([1.9534385 , 2.55068166, 4.36867143, 1.66495626, 0.749139  ])

In [97]:
x_pos = np.arange(5)
width = 0.35
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '500px'
fig.canvas.layout.height = '400px'
plt.bar(x_pos, mean_12nm_C2nr, width, yerr=std_12nm_C2nr, ecolor='black',capsize=5, color='green')
plt.bar(x_pos+width, mean_15nmC2_nr, width, yerr=std_15nmC2_nr, ecolor='black',capsize=5, color='violet')
plt.title(r'No. of PLGA residues Rg unrestrained, Config 2', fontsize=15)
plt.xticks(x_pos+width/2, labels=aa_types, fontsize=12)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm'], frameon=False)
plt.ylabel(r'No. of PLGA residues', fontsize=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'No. of PLGA residues')

### Protein/polymer contact map movie

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 1.5 nm Unres", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_ppmap15nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('1.5nm_NoRes.mp4', writer='ffmpeg', fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

# 2 nm PLGA unrestrained Rg 100 ns trajectory

Load the rg = 1.2 nm (3 PLGA N = 20 oligomer/BSA system)

In [98]:
# load the unrestrained trajectory 
pn2nm_noresC2 = mda.Universe("../C2_bsa_n20plga_inwater/C2_plgaNoRes/2nmNoRes_C2/2nm_NoResplga.pdb"
                      , "../C2_bsa_n20plga_inwater/C2_plgaNoRes/2nmNoRes_C2/C2_2nmbplga.xtc")

Check that we are on the first frame

In [99]:
pn2nm_noresC2.trajectory.frame

0

In [100]:
p2nm_len_nrC2 = len(pn2nm_noresC2.trajectory)
p2nm_len_nrC2

10001

In [101]:
#Select all the PLGA residues, heavy atoms only 
p2nm_allnrC2 = pn2nm_noresC2.select_atoms("resname sPLG PLG tPLG and not type H")
p2nm_allnrC2

<AtomGroup with 543 atoms>

In [102]:
# Select BSA residues, heavy atoms only 
prot2nm_noresC2 = pn2nm_noresC2.select_atoms("protein and not type H")
prot2nm_noresC2

<AtomGroup with 4653 atoms>

### Calc. total fraction of contacts

In [103]:
#dmax = 4.0, protein group(4653 atoms), plga atom group (543 atoms), took 381.6 s (6 min 36s on 4 cores)
start = 0
end = p2nm_len_nrC2 - 1
s_time = timeit.default_timer()
h2diC2_2nr = aa_frmcount(prot2nm_noresC2, p2nm_allnrC2, dmax, pn2nm_noresC2, start, end)
timeit.default_timer() - s_time

667.2053520092741

In [104]:
len(h2diC2_2nr.keys())

122

In [106]:
prC2_2nr = list(prot2nm_noresC2.residues)
ssC2_2nr = [str(row) for row in prC2_2nr]
rkgC2_2nm = {key:h2diC2_2nr[key][0] for key, value in h2diC2_2nr.items()}
plgC2_2nm_nr = pd.DataFrame(data=ssC2_2nr, columns=["BSA_des_res"])
plgC2_2nm_nr['mda_2nm_nrC2'] = plgC2_2nm_nr['BSA_des_res'].map(rkgC2_2nm)
plgC2_2nm_nr['BSA_des_res'] = red_bsa
plgC2_2nm_nr['mda_2nm_nrC2'] = plgC2_2nm_nr['mda_2nm_nrC2'].replace('nan', np.nan).fillna(0)
plgC2_2nm_nr.head()

Unnamed: 0,BSA_des_res,mda_2nm_nrC2
0,ASP 1,0.0
1,THR 2,0.0
2,HIS 3,0.0
3,LYS 4,0.0
4,SER 5,0.0


In [107]:
# Extract mean occupancy values
prC2_res_2ur = list(prot2nm_noresC2.residues)
ssC2_res_2ur = [str(row) for row in prC2_res_2ur]
rkgC2_2ur = {key:h2diC2_2nr[key][1] for key, value in h2diC2_2nr.items()}
plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'] = plgC2_1_2nm_NRocc['BSA_des_res'].map(rkgC2_2ur)
plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'].replace('nan', np.nan).fillna(0)
plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'] = plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'].round(2)
plgC2_1_2nm_NRocc

Unnamed: 0,BSA_des_res,mda_occ_1.2nm_NRC2,mda_occ_1.5nm_NRC2,mda_occ_2nm_NRC2
0,"<Residue ASP, 1>",1.00,1.00,0.00
1,"<Residue THR, 2>",0.99,1.00,0.00
2,"<Residue HIS, 3>",0.99,0.97,0.00
3,"<Residue LYS, 4>",0.99,0.99,0.00
4,"<Residue SER, 5>",0.95,0.81,0.00
...,...,...,...,...
578,"<Residue GLN, 579>",0.00,0.00,0.80
579,"<Residue THR, 580>",0.00,0.00,0.67
580,"<Residue ALA, 581>",0.00,0.00,0.23
581,"<Residue LEU, 582>",0.00,0.00,0.38


In [109]:
bsaC2_2ur = np.array(list(prot2nm_noresC2.resids)) # shape is 4652
mC2_occ_2nmNR = np.array(list(plgC2_1_2nm_NRocc['mda_occ_2nm_NRC2'])) # shape is 583
mC2_occ2nm = np.zeros(shape=(4653))
at_ind = np.where(bsaC2_2ur[:-1] != bsaC2_2ur[1:])[0]
at_in_nw = np.sort(np.append([0,4653],at_ind))
nw_v = 0
for i in range(583):
    b = at_in_nw[i+1] +1
    mC2_occ2nm[nw_v:b] = mC2_occ_2nmNR[i]
    nw_v = at_in_nw[i+1] + 1 

In [110]:
mC2_occ2nm[633:646]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

### Visualize Occupanct on protein 

In [111]:
prot2nm_noresC2.occupancies = mC2_occ2nm
prot2nm_noresC2.occupancies

array([0.  , 0.  , 0.  , ..., 0.54, 0.54, 0.54])

In [112]:
with mda.Writer("prot_2nm_noresC2.pdb") as pdb:
    pdb.write(prot2nm_noresC2)

### Residue Importance: 2 nm Unrestrained 

In [113]:
trj_ppmap2nmC2_nr = prot_poly_cntmovie(prot2nm_noresC2, p2nm_allnrC2, dmax, pn2nm_noresC2, 0, 10000)
#trj_ppmap_12nm_chC = prot_poly_cntmovie(prot, all_pn20_C, dmax, u_pn20, 0, 10000)

In [114]:
np.save('2nmPLGA_NoResC2.npy', trj_ppmap2nmC2_nr)    # .npy extension is added if not given

In [None]:
trj_ppmap2nm_nr = np.load('2nm_NoRes.npy', allow_pickle=True)

In [115]:
trj_ppmap2nmC2_nr[0].shape

(583, 60)

In [117]:
np.sum(trj_ppmap2nmC2_nr[1000][0])

0.0

In [118]:
kj = np.zeros(shape=(10000, 583))
kj[:,582].shape

(10000,)

In [119]:
ppC2_2nmNR_ct = np.zeros(shape=(10000, 583))
for i in range(10000):
    for j in range(583):
        ppC2_2nmNR_ct[i][j] = np.sum(trj_ppmap2nmC2_nr[i][j])

In [120]:
ppC2_2nmNRtot = np.zeros(shape=(583))
for i in range(583):
    ppC2_2nmNRtot[i] = np.sum(ppC2_2nmNR_ct[:,i])
#pp_12nmtot

In [121]:
np.nonzero(ppC2_2nmNRtot)

(array([ 31,  32,  83,  85,  86, 102, 103, 104, 105, 106, 107, 108, 109,
        110, 111, 113, 146, 195, 202, 206, 207, 210, 211, 214, 224, 225,
        226, 227, 228, 229, 230, 231, 233, 234, 300, 301, 302, 303, 306,
        307, 310, 312, 315, 316, 317, 319, 320, 321, 322, 323, 324, 325,
        326, 327, 328, 329, 330, 331, 334, 335, 414, 415, 416, 417, 419,
        420, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 476, 492,
        493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 506,
        532, 533, 534, 535, 536, 537, 538, 540, 541, 543, 544, 548, 555,
        563, 564, 566, 567, 568, 569, 570, 571, 572, 574, 575, 576, 577,
        578, 579, 580, 581, 582]),)

In [122]:
aNR = np.sum(ppC2_2nmNRtot)
aNR

649949.0

In [123]:
bNR = np.sum(ppC2_15nmNRtot)
bNR

776434.0

In [124]:
cNR = np.sum(ppC2_12nmtot_nr)
cNR

925587.0

In [125]:
plt.close('all')

In [131]:
y_pos = np.arange(583) + 1
wid = np.zeros(shape=583)
wid += 1.5
#wid
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(y_pos, ppC2_12nmtot_nr/cNR, align='center',width=wid, alpha=0.4, color='#1D77CF',label='1.2 nm PLGA')
plt.bar(y_pos+0.25, ppC2_15nmNRtot/bNR, align='center',width=wid, color='#562A8B', alpha=0.3, label='1.5 nm PLGA')
plt.bar(y_pos+0.3, ppC2_2nmNRtot/aNR, align='center',width=wid, color='#4E4C4D', alpha=0.3, label='2 nm PLGA')
#plt.bar(y_pos+0.25, pp_15nmNRtot/bNR, align='center',width=wid, color='#562A8B', alpha=0.3, label='1.5 nm PLGA')
#plt.bar(y_pos, pp_12nmtot_nr/cNR, align='center',width=wid, alpha=0.4, color='#1D77CF',label='1.2 nm PLGA')
plt.title("BSA in water with PLGA unrestrained, 100 ns", fontsize=16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlim([0,600])
plt.ylim([0,0.2])
plt.legend(fontsize=16)
plt.ylabel(r'Normalized Total No. of PLGA contacts', fontsize=15)
plt.xlabel(r'BSA Residue ID', fontsize=16)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'BSA Residue ID')

### Total number of residues that are within 4 angstroms of a PLGA oligomer residue within a 100 ns trajectory block

In [133]:
aplC2_2nm_nr = []

# Some residues don't have any contact with the 3 N = 20 PLGA oligomers within 100 ns,
# Put residues that do have contact with BSA in a separate list
for index, r_pl in plgC2_2nm_nr.iterrows():
    if r_pl['mda_2nm_nrC2'] != 0:
        aplC2_2nm_nr.append(r_pl['BSA_des_res'])
        
# This chunk of code gets an AA count from the above list, in order 
# to get a total number of residues that contact BSA
cplC2_2nm_nr = []

for index, r_a in aa_count.iterrows():
    count = 0
    for i in range(len(aplC2_2nm_nr)):
        if r_a['Amino_acids'] in aplC2_2nm_nr[i]:
            count += 1
    cplC2_2nm_nr.append(count)      
        
aa_count['plgaC2_2nm_100ns_NR'] = cplC2_2nm_nr
#aa_count.drop('No_of_surf_res (VMD)', axis=1, inplace=True)
aa_count

Unnamed: 0,Amino_acids,No_of_surf_res (MDAnalysis),No_of_surf_res (VMD),plgaC2_1.2nm_100ns_NR,plgaC2_1.5nm_100ns_NR,plgaC2_2nm_100ns_NR
0,LYS,59,59,25,15,15
1,ARG,23,23,3,6,4
2,HIS,16,16,5,5,3
3,ASP,40,40,11,10,10
4,GLU,59,59,21,13,12
5,SER,23,23,5,3,6
6,THR,29,29,8,3,11
7,CYS,24,24,12,5,0
8,TYR,19,19,2,3,5
9,ASN,14,14,6,4,2


In [134]:
# This gives the total number of residues that are within 4 angstroms of a PLGA oligomer residue
# within a 100 ns trajectory block
aa_count['plgaC2_2nm_100ns_NR'].sum()

122

In [135]:
# This gives the total number of residues that are within 4 angstroms of a water molecule
# within a 1 ns trajectory block
aa_count['No_of_surf_res (MDAnalysis)'].sum()

487

In [136]:
# This gives the total fraction of contacts within the 2 nm unrestrained Rg 100 ns trajectory
aa_count['plgaC2_2nm_100ns_NR'].sum()/aa_count['No_of_surf_res (MDAnalysis)'].sum()

0.25051334702258726

In [137]:
# Mean occupancy and std deviation 
ll_mo2_nr = [value[1] for key, value in h2diC2_2nr.items()]
print("Mean Occpancy (2 nm Rg): "+str(np.mean(ll_mo2_nr)), "Occ. std. dev.: "+str(np.std(ll_mo2_nr)))

Mean Occpancy (2 nm Rg): 0.30424918032786885 Occ. std. dev.: 0.35103234222249863


In [138]:
cd_2nm = frac_cont(h2diC2_2nr)
cd_2nm

{'Negative': [44630.0, 22, 247493.63636363635, 0.12986941363924406],
 'Positive': [53921.0, 22, 299016.45454545453, 0.15690541458305354],
 'Polar': [101337.0, 30, 412103.8, 0.21624668678700554],
 'Hydrophobic': [171296.0, 48, 435377.3333333333, 0.22845920332570346],
 'Aromatic': [58722.0, 14, 511720.2857142857, 0.26851928166499345]}

### Calc. fractional contacts for each AA group type 

In [None]:
test_nr = aa_frmcount(prot2nm_nores, p2nm_allnr, dmax, pn2nm_nores, 6000, 8000)

In [None]:
frac_cont(test_nr)

In [139]:
no_surf

487

In [140]:
fcntrg2nmC2_nr, prgrp2nmC2_nr, aamatx_2nmC2_nr = bavg_frac_cnt(5, prot2nm_noresC2, p2nm_allnrC2
                                                               , dmax, pn2nm_noresC2, no_surf, 0, 10000)

0
2000
2000
4000
4000
6000
6000
8000
8000
10000


In [141]:
fcntrg2nmC2_nr

{'Negative': array([0.14513798, 0.13315851, 0.13274341, 0.1541703 , 0.11694215]),
 'Positive': array([0.16370246, 0.20579459, 0.16567423, 0.14091961, 0.09304844]),
 'Polar': array([0.20976739, 0.18589637, 0.20758789, 0.20763904, 0.25394417]),
 'Hydrophobic': array([0.25050754, 0.2232402 , 0.2181323 , 0.21523998, 0.20981455]),
 'Aromatic': array([0.23088462, 0.25191034, 0.27586216, 0.28203107, 0.32625068]),
 'total_frac': array([0.20123203, 0.18275154, 0.17453799, 0.16837782, 0.13141684])}

In [142]:
fc2nmC2_mean_nr = np.array([np.mean(fcntrg2nmC2_nr['Negative']), np.mean(fcntrg2nmC2_nr['Positive'])
                        ,np.mean(fcntrg2nmC2_nr['Polar']),np.mean(fcntrg2nmC2_nr['Hydrophobic'])
                        , np.mean(fcntrg2nmC2_nr['Aromatic'])])
fc2nmC2_mean_nr

array([0.13643047, 0.15382787, 0.21296697, 0.22338692, 0.27338777])

In [143]:
fc2nmC2_std_nr = np.array([np.std(fcntrg2nmC2_nr['Negative']), np.std(fcntrg2nmC2_nr['Positive'])
                       ,np.std(fcntrg2nmC2_nr['Polar']),np.std(fcntrg2nmC2_nr['Hydrophobic']), np.std(fcntrg2nmC2_nr['Aromatic'])])
#fc2nm_std_nr[4] = 0.05
fc2nmC2_std_nr

array([0.01260648, 0.03687586, 0.02226884, 0.01423898, 0.03207215])

In [160]:
x_pos = np.arange(5)
width = 0.3
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(x_pos, fc_12nmnrC2_mean, width, yerr=fc_12nmnrC2_std, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, fc15nmC2_mean_nr, width, yerr=fc15nmC2_std_nr, ecolor='black',capsize=5, color='c')
plt.bar(x_pos+(2*width), fc2nmC2_mean_nr, width, yerr=fc2nmC2_std_nr, ecolor='black',capsize=5, color='lightslategray')
plt.title(r'Fractional Contacts Rg unrestrained, Config 2', fontsize=16)
plt.yticks(fontsize=16)
plt.xticks(x_pos+width, labels=aa_types, fontsize=16)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm', 'Rg = 2 nm'], frameon=False, fontsize=16)
plt.ylabel(r'Fractional Contacts', fontsize=16)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Fractional Contacts')

### Total fraction of contacts: averages and std dev calc from 5 20ns blocks

In [145]:
np.mean(fcntrg2nmC2_nr['total_frac'])

0.17166324435318275

In [146]:
np.std(fcntrg2nmC2_nr['total_frac'])

0.02296124968951846

### Avg no. PLGA residues per BSA AA residue group 

In [147]:
prgrp2nmC2_nr

{'Negative': array([array([9.917     , 3.37818161]), array([11.365    ,  3.7141318]),
        array([10.464     ,  2.97114523]), array([8.4025    , 2.42991641]),
        array([5.123     , 1.79607099])], dtype=object),
 'Positive': array([array([13.992     ,  3.55667485]),
        array([17.525     ,  4.02111614]),
        array([16.015     ,  4.43427277]), array([9.0345    , 3.57397674]),
        array([3.8       , 1.86252517])], dtype=object),
 'Polar': array([array([36.4215    ,  6.21512975]),
        array([39.2085    ,  5.41230337]),
        array([43.3455    ,  6.36027749]), array([35.6335   ,  6.4582643]),
        array([28.971     ,  4.18463368])], dtype=object),
 'Hydrophobic': array([array([51.5075    ,  7.69337012]),
        array([56.3235    ,  7.90650667]),
        array([49.9685    ,  7.43885124]),
        array([43.4205    ,  6.82229285]),
        array([36.0115    ,  6.05213745])], dtype=object),
 'Aromatic': array([array([21.2185    ,  3.55158524]),
        array([23.9

In [149]:
mean_2nmC2_nr = np.zeros(shape=5)
std_2nmC2_nr = np.zeros(shape=5)
count = 0
for key, value in prgrp2nmC2_nr.items():
    mpl_2nmC2_nr = []
    var_2nmC2_nr = []
    for i in prgrp2nmC2_nr[str(key)].flat:
        mpl_2nmC2_nr.append(i[0])
        var_2nmC2_nr.append((i[1]))
    
    # calc frac cont averages
    mean_2nmC2_nr[count] = np.mean(mpl_2nmC2_nr)
    
    # calc frac cont std dev: https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation 
    std_2nmC2_nr[count] = np.std(mpl_2nmC2_nr)
    # std_2nm_nr[count] = np.sqrt(np.sum(var_2nm_nr)/5)
    
    count += 1


In [150]:
mean_2nmC2_nr

array([ 9.0543, 12.0733, 36.716 , 47.4463, 22.0737])

In [151]:
#np.std()
#std_2nm_nr[4] = 1
std_2nmC2_nr

array([2.18864354, 5.03232034, 4.72030482, 7.05154217, 1.74460011])

In [157]:
x_pos = np.arange(5)
width = 0.3
aa_types = ["Negative", "Positive", "Polar", "Hydrophobic", "Aromatic"]
fig = plt.figure(figsize=(12,12))
fig.canvas.layout.width = '800px'
fig.canvas.layout.height = '700px'
plt.bar(x_pos, mean_12nm_C2nr, width, yerr=std_12nm_C2nr, ecolor='black',capsize=5, color='royalblue')
plt.bar(x_pos+width, mean_15nmC2_nr, width, yerr=std_15nmC2_nr, ecolor='black',capsize=5, color='c')
plt.bar(x_pos+(2*width), mean_2nmC2_nr, width, yerr=std_2nmC2_nr, ecolor='black',capsize=5, color='lightslategray')
plt.title(r'No. of PLGA residues Rg unrestrained, Config 2', fontsize=16)
plt.xticks(x_pos+width, labels=aa_types, fontsize=16)
plt.yticks(fontsize=16)
plt.legend(['Rg = 1.2 nm', 'Rg = 1.5 nm', 'Rg = 2 nm'], frameon=False, fontsize=16)
plt.ylabel(r'No. of PLGA residues', fontsize=16)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'No. of PLGA residues')

### Protein/polymer contact map movie

In [None]:
fig = plt.figure(figsize=(10,10))

# Set the axis and the plot titles pp

plt.title("BSA/PLGA contact map 2 nm Unres.", fontsize=22, loc='left')
plt.xlabel("PLGA Residue No.", fontsize=22)
plt.ylabel("BSA Residue No.", fontsize=20)

 # Set the axis range 
plt.ylim(583, 0)
plt.xlim(0, 60)

# Plot bands for each chain 
BANDS = (
    (0, 20, "purple", "B"),
    (20, 40, "blue", "C"),
    (40, 60, "green", "D"),
)
    
text_y = 0.98 # Close to the top
for start, stop, color, band in BANDS:
    plt.axvspan(start, stop,color=color, alpha=0.15)
    text_x = middle_of_band(start,stop)
    plt.text(
        text_x,
        text_y,
        "PLGA chain " + band,
        color=color,
        fontsize=18,
        transform=fig.gca().transAxes,
        horizontalalignment='center',
        verticalalignment='center',
        style='italic',
    )
    
plt.text(0.94, 1, "Time [ns]:", fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')

# Set tick label size
fig.gca().tick_params(axis='both', which='major', labelsize=20)

ims = []
for i in range(10000):
    data = trj_ppmap2nm_nr[i]
    im = plt.imshow(data, aspect='auto', cmap='Greys')
    t_sim = plt.text(1.03, 1, str(i/100), fontsize=20, transform=fig.gca().transAxes, horizontalalignment='right', verticalalignment='bottom')
    ims.append([im, t_sim])
    
ani = animation.ArtistAnimation(fig, ims, blit=True, repeat=False)
ani.save('2nm_NoRes.mp4', writer='ffmpeg', fps=50, bitrate=100000)
#plt.tight_layout()
#plt.show()

In [None]:
plt.close('all')