# Proof of concept

In [None]:
import os
import numpy 
import isambard_dev
import pandas
import seaborn
import MDAnalysis as mda
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  
%matplotlib inline

In [None]:
wd = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'

# Tests

## Comparison $R_g$ calculation

`isambard` implementation

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'
model_pdb = path0 +model_dir+'/'+model_name+'/'+model_name+'.pdb'

In [None]:
model_pdb

In [None]:
p = isambard_dev.ampal.convert_pdb_to_ampal(model_pdb)
p.radius_of_gyration

`MDAnalysis` implementation

In [None]:
u = mda.Universe(model_pdb)
protein = u.select_atoms("protein")
protein.radius_of_gyration()

<b><span style="color:red"> NOTE </span></b>

* `isambard` does not have a way to compute the components of $R_g$

# Radius of gyration  components

Proof of concept

In [None]:
u = mda.Universe(model_pdb)
protein = u.select_atoms("protein")
protein_mass = numpy.sum(protein.masses)
protein_inertia = protein.moment_of_inertia() # tensor of inertia

Reference: Moment of Inertia

https://en.wikipedia.org/wiki/Moment_of_inertia#Inertia_tensor

https://www.mdanalysis.org/MDAnalysisTutorial/atomgroups.html

In [None]:
protein_inertia

In [None]:
protein_Rg_n = [numpy.sqrt(protein_inertia[i][i]/float(protein_mass)) for i,j in []]

In [None]:
protein_Rg_n

Implementation

In [None]:
import MDAnalysis as mda

def get_Rg_components(model_pdb):
    u = mda.Universe(model_pdb)
    protein = u.select_atoms("protein")
    protein_mass = numpy.sum(protein.masses)
    protein_inertia = protein.moment_of_inertia() # tensor of inertia
    f = lambda x : numpy.sqrt(x/float(protein_mass))
    protein_Rg_n = [f(protein_inertia[i][i]) for i in range(3)]
    Rg_x, Rg_y, Rg_z = protein_Rg_n
    return Rg_x, Rg_y, Rg_z

In [None]:
get_Rg_components(model_pdb)

# Docked structures

## Conformational Groups: All models

### Scatter dataplots

In [None]:
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10))
seaborn.set_style('white')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

COLORS = {
    'conformation0':'blue',
    'conformation1':'green'
}

for i in range(len(MUTANTS)):
    mutant = MUTANTS[i]
    for C in CONFORMATIONS[mutant]:
        X = []
        Z = []
        mutant_dir = mutant+'/'+C 
        for model_dir in [mutant_dir+'/'+model for model in os.listdir(wd+mutant_dir)]:
            model_name = model_dir.split('/')[-1]
            model_pdb = wd+'/'+model_dir+'/'+model_name+'.pdb'
            Rg_x, Rg_y, Rg_z = get_Rg_components(model_pdb)
            X.append(Rg_x)
            Z.append(Rg_z)
        seaborn.scatterplot(X,Z,color=COLORS[C],ax=axes[mutant])

plt.show()

### Density dataplots

In [None]:
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10))
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

for i in range(len(MUTANTS)):
    mutant = MUTANTS[i]
    for C in CONFORMATIONS[mutant]:
        X = []
        Z = []
        mutant_dir = mutant+'/'+C 
        for model_dir in [mutant_dir+'/'+model for model in os.listdir(wd+mutant_dir)]:
            model_name = model_dir.split('/')[-1]
            model_pdb = wd+'/'+model_dir+'/'+model_name+'.pdb'
            Rg_x, Rg_y, Rg_z = get_Rg_components(model_pdb)
            X.append(Rg_x)
            Z.append(Rg_z)
        seaborn.kdeplot(X,Z,n_levels=20,cmap=CMAPS[C],ax=axes[mutant])

plt.show()

## Conformational Groups: Sample models

In [None]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

In [None]:
MyTags  = [
    ['cWza', 'conformation0'],
    ['cWza', 'conformation1'],
    ['cWza-K375C', 'conformation0'],
    ['cWza-K375C', 'conformation1'],
    ['cWza-S355C', 'conformation0'],
    ['cWza-S355C', 'conformation1'],
    ['cWza-Y373C', 'conformation1']
]

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10))
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

for tag in MyTags:
    mutant, C = tag
    X = []; Z = []
    for i in range(len(Sample)):
        model_dir = Sample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            model_pdb = wd+model_dir+'/'+model_name+'.pdb'
            Rg_x, Rg_y, Rg_z = get_Rg_components(model_pdb)
            X.append(Rg_x)
            Z.append(Rg_z)
    seaborn.kdeplot(X,Z,n_levels=20,cmap=CMAPS[C],ax=axes[mutant])

plt.show()

### Scatter dataplots

### Density plots

# MD trajectories for docked structures

## Proof of concept

In [None]:
import os

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'

name = "md_100ns"
tpr_path = wd + model_dir+ '/' +model_name+'/complex/mdf/'+ name + ".tpr"
xtc_path = wd + model_dir+ '/' +model_name+'/complex/mdf/'+ name + ".xtc"

In [None]:
if all(map(os.path.isfile, [tpr_path,xtc_path])):
    u = mda.Universe(tpr_path, xtc_path, in_memory=True)
    print("Trajectory loaded and ready to analyse!")
else:
    print("MD traj not available")

In [None]:
u.trajectory.dt # Time-step (ps)

In [None]:
u.trajectory.n_frames

In [None]:
protein = u.select_atoms("protein", updating=True)

In [None]:
data = []
for ts in u.trajectory[:-1:10]:
    protein_mass = numpy.sum(protein.masses)
    protein_inertia = protein.moment_of_inertia() # tensor of inertia
    protein_mass = numpy.sum(protein.masses)
    protein_inertia = protein.moment_of_inertia() # tensor of inertia
    f = lambda x : numpy.sqrt(x/float(protein_mass))
    protein_Rg_n = [f(protein_inertia[i][i]) for i in range(3)]
    Rg_x, Rg_y, Rg_z = protein_Rg_n
    data.append([Rg_x, Rg_z])
    print(Rg_x, Rg_y, Rg_z)

In [None]:
df = pandas.DataFrame(data)

In [None]:
seaborn.scatterplot(df[0],df[1])

## Implementation

In [None]:
import os
import MDAnalysis as mda

def is_traj_available(dir_path, md_name):
    """Check whether all trajectory files are available
    This indicates whether simulations has finished successfully too"""
    gro_path = dir_path + name + ".gro"
    tpr_path = dir_path + name + ".tpr"
    xtc_path = dir_path + name + ".xtc"
    test_files = [gro_path, tpr_path, xtc_path]
    if all(map(os.path.isfile, test_files)):
        return True

def get_Rg_components_traj(dir_path, md_name):
    if is_traj_available(dir_path, md_name):
        tpr_path = dir_path + name + ".tpr"
        xtc_path = dir_path + name + ".xtc"
        u = mda.Universe(tpr_path, xtc_path, in_memory=True)
        n_frames = u.trajectory.n_frames
        dt = u.trajectory.dt
        mssg = "Loaded "+str(n_frames)+" frames with timestep "+str(dt)
#         print(mssg)
    else:
        print("MD traj files (.tpr/.xtc) not available")
    try:
        protein = u.select_atoms("protein", updating=True)
        data = []
        for ts in u.trajectory:
            protein_mass = numpy.sum(protein.masses)
            protein_inertia = protein.moment_of_inertia() # tensor of inertia
            protein_mass = numpy.sum(protein.masses)
            protein_inertia = protein.moment_of_inertia() # tensor of inertia
            f = lambda x : numpy.sqrt(x/float(protein_mass))
            protein_Rg_n = [f(protein_inertia[i][i]) for i in range(3)]
            Rg_x, Rg_y, Rg_z = protein_Rg_n
            data.append([Rg_x, Rg_y, Rg_z])
        return data
    except:
        return []
    try:
        u.close()
    except:
        pass

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'

name = "md_100ns"
dir_path = wd + model_dir+ '/' +model_name+'/complex/mdf/'

In [None]:
is_traj_available(dir_path, name)

In [None]:
Rg_traj = get_Rg_components_traj(dir_path, name)

In [None]:
Rg_traj = numpy.array(Rg_traj)

In [None]:
fig,ax = plt.subplots(1,1)
Rg_x, Rg_y, Rg_z = Rg_traj.T

seaborn.scatterplot(Rg_x[:100], Rg_z[:100],color='black',ax=ax)
seaborn.scatterplot(Rg_x[-100:], Rg_z[-100:],color='red',ax=ax)

plt.show()

## Alternative Implementation

### Proof of concept

In [None]:
path0 = wd+"cWza/conformation0/refined1_0001_INPUT_0908_ignorechain/complex/mdf/md_100ns/"

In [None]:
!ls $path0

In [None]:
# Compute directory size
import os
size_frames = sum(os.path.getsize(path0+f) for f in os.listdir(path0) if os.path.isfile(path0+f))

In [None]:
import json
with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

In [None]:
# Size of all 100 frames in GBs
len(Sample)*size_frames/1e9

**Advantages**:

* Can be parallelised _unlinke_ MD trajectory analysis (u.trajectory)
* Overall time per trajectory 15 secs (9 secs traj loading + 6 secs data analysis)

**Caveats**:

* Requires frame extraction, which requires time. However, already paralelised (Use  `~/mpmodeling/tools/protein_frame_extractor.py`)
* Increased storage 

Serial implementation

In [None]:
for i in range(100):
    model_pdb = path0+'Protein_'+str(i)+'.pdb'
    Rg_components = get_Rg_components(model_pdb)
    print(Rg_components)

Parallel implementation

In [None]:
func = get_Rg_components

param_list = []
for i in range(100):
    model_pdb = path0+'Protein_'+str(i)+'.pdb'
    param_list.append(model_pdb)

import concurrent.futures     
n_threads = 10

data = []

with concurrent.futures.ProcessPoolExecutor(max_workers = n_threads) as executor:
    out = executor.map(func, param_list)
    data.append(out)

output = list(data[0])

output

## Sample completeness

In [None]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

In [None]:
md_name = 'md_100ns'
for i in range(len(Sample)):
    model_dir = Sample[i]
    dir_path = wd+model_dir+'/complex/mdf/'
    print(model_dir, is_traj_available(dir_path, md_name))

## First 10 ns

Serial implementation

In [None]:
MyTags  = [
    ['cWza', 'conformation0'],
    ['cWza', 'conformation1'],
    ['cWza-K375C', 'conformation0'],
    ['cWza-K375C', 'conformation1'],
    ['cWza-S355C', 'conformation0'],
    ['cWza-S355C', 'conformation1'],
    ['cWza-Y373C', 'conformation1']
]

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10))
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

name = 'md_100ns'
for tag in MyTags:
    data = []
    mutant, C = tag
    for i in range(2):
        model_dir = Sample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            dir_path = wd+model_dir+'/complex/mdf/'
            Rg_traj = get_Rg_components_traj(dir_path, name)
            data = data + Rg_traj[:100]
    data = numpy.array(data)
    try:
        Rg_x, Rg_y, Rg_z = data.T
        seaborn.scatterplot(Rg_x, Rg_z,color='black',ax=axes[mutant])
    except:
        pass
    
#     seaborn.kdeplot(X,Z,n_levels=20,cmap=CMAPS[C],ax=axes[mutant])

plt.show()

Parallel implementation

In [None]:
import concurrent.futures     
n_threads = 4

data = []

with concurrent.futures.ThreadPoolExecutor(max_workers = n_threads) as executor:
    out = executor.map(func, param_list)
    data.append(out)

In [None]:
def func(dir_path):
    name = 'md_100ns'
    data = get_Rg_components_traj(dir_path, name)
    return data

In [None]:
n_cores = 20
DATA = {}

for tag in MyTags:
    data = []
    param_list = []
    mutant, C = tag
    for i in range(100):
        model_dir = Sample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            dir_path = wd+model_dir+'/complex/mdf/'
            param_list.append( dir_path )
    print(len(param_list))
    with concurrent.futures.ProcessPoolExecutor(max_workers = n_cores) as executor:
        out = executor.map(func, param_list)
        data.append(out)
    
    DATA[json.dumps(tag)] = list(data)

In [None]:
DATA

In [None]:
[list(DATA[k][0]) for k in DATA.keys()]

In [None]:
MyTags  = [
    ['cWza', 'conformation0'],
    ['cWza', 'conformation1'],
    ['cWza-K375C', 'conformation0'],
    ['cWza-K375C', 'conformation1'],
    ['cWza-S355C', 'conformation0'],
    ['cWza-S355C', 'conformation1'],
    ['cWza-Y373C', 'conformation1']
]

DATA = {}
name = 'md_100ns'

for tag in MyTags:
    data = []
    mutant, C = tag
    for i in range(len(Sample)):
        model_dir = Sample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            dir_path = wd+model_dir+'/complex/mdf/'
            Rg_traj = get_Rg_components_traj(dir_path, name)
            data = data + Rg_traj[:100]
    DATA[json.dumps(tag)] = data

In [None]:
import concurrent.futures     

In [None]:
import concurrent.futures     
n_threads = 4

data = []

with concurrent.futures.ThreadPoolExecutor(max_workers = n_threads) as executor:
    out = executor.map(func, param_list)
    data.append(out)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

fig, ax = plt.subplots()

x = np.arange(0, 2*np.pi, 0.01)
line, = ax.plot(x, np.sin(x))


def init():  # only required for blitting to give a clean slate.
    line.set_ydata([np.nan] * len(x))
    return line,


def animate(i):
    line.set_ydata(np.sin(x + i / 100))  # update the data.
    return line,


ani = animation.FuncAnimation(
    fig, animate, init_func=init, interval=2, blit=True, save_count=50)

# To save the animation, use e.g.
#
# ani.save("movie.mp4")
#
# or
#
# from matplotlib.animation import FFMpegWriter
# writer = FFMpegWriter(fps=15, metadata=dict(artist='Me'), bitrate=1800)
# ani.save("movie.mp4", writer=writer)

plt.show()

## Last 10 ns

# HOLE metrics

* HOLE wrapper implemented with `MDAnalysis`
* Needs HOLE to be compiled and located in `~/hole2/exe/hole`, although can be modified.

https://www.mdanalysis.org/docs/documentation_pages/analysis/hole.html

## Single PDB

In [None]:
%matplotlib notebook

In [None]:
from MDAnalysis.analysis.hole import HOLE
from MDAnalysis.tests.datafiles import PDB_HOLE

Test model

In [None]:
H = HOLE(PDB_HOLE, executable="~/hole2/exe/hole")  # set path to your hole binary
H.run()
H.collect()
H.plot(linewidth=3, color="black", label=False)

In [None]:
H.min_radius()[0][-1]

Docked model

In [None]:
wd = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'
model_pdb = wd +model_dir+'/'+model_name+'/'+model_name+'.pdb'

In [None]:
model_pdb[:-4]+'.hole_out'

In [None]:
wd

In [None]:
param_list[0]

In [None]:
H = HOLE(
        model_pdb, 
        executable="~/hole2/exe/hole",
        logfile=model_pdb[:-4]+'.hole_out'
        )  

H.check_and_fix_long_filename('hi',tmpdir='/home/ba13026/tmp_hole')

In [None]:
H.run()

In [None]:
H.min_radius()[0][-1]

In [None]:
fig=plt.figure()
H.collect()
H.plot(linewidth=3, color="black")
plt.show()

In [None]:
H.min_radius()[0][-1]

In [None]:
Z_coords = [x[1] for x in numpy.array(list(H.profiles[0]))]
HOLE_L = max(Z_coords) - min(Z_coords)

In [None]:
HOLE_L

##    My own wrapper

In [1]:
import os
import sys
sys.path.append('/home/ba13026/mpmodeling/analysis')
import analyse_HOLE

In [2]:
model_pdb = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/cWza/conformation0/refined1_0001_INPUT_0736_ignorechain/complex/mdf/md_100ns/Protein_0.pdb'

In [3]:
tmp_wd = os.path.dirname(model_pdb)
os.chdir(tmp_wd)

analyse_HOLE.hole(os.path.basename(model_pdb))

In [8]:
def get_HOLE_Rmin(model_pdb):
    tmp_wd = os.path.dirname(model_pdb)
    os.chdir(tmp_wd)
    try:
        HOLE_dimensions,HOLE_conductance_estimates = analyse_HOLE.hole(os.path.basename(model_pdb))
        return HOLE_dimensions[0]
    except:
        return 'Fail'

In [9]:
get_HOLE_Rmin(model_pdb)

6.07572

## MD trajectory

Test model

In [None]:
import MDAnalysis as mda
from MDAnalysis.analysis.hole import HOLEtraj
from MDAnalysis.tests.datafiles import MULTIPDB_HOLE

u = mda.Universe(MULTIPDB_HOLE)
H = HOLEtraj(u, executable="~/hole2/exe/hole")
H.run()
H.plot3D()

Docked model

In [None]:
import MDAnalysis as mda

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'

name = "md_100ns"
tpr_path = wd + model_dir+ '/' +model_name+'/complex/mdf/'+ name + ".tpr"
xtc_path = wd + model_dir+ '/' +model_name+'/complex/mdf/'+ name + ".xtc"

In [None]:
u = mda.Universe(tpr_path, xtc_path, in_memory=True)

<span style="color:red">Execution: 30 mins</span>

In [None]:
import MDAnalysis as mda
from MDAnalysis.analysis.hole import HOLEtraj
# from MDAnalysis.tests.datafiles import MULTIPDB_HOLE

# u = mda.Universe(MULTIPDB_HOLE)
H = HOLEtraj(u, executable="~/hole2/exe/hole")
H.run()
H.plot3D()

In [None]:
x = H.min_radius()

In [None]:
import numpy

In [None]:
import matplotlib.pyplot as plt

plt.plot(numpy.array(x).T[-1])
plt.show()

## Alternative Implementation

### Proof of concept

Single PDB

In [None]:
import MDAnalysis as mda

def get_HOLE_Rmin(model_pdb):
    u = mda.Universe(model_pdb)
    protein = u.select_atoms("protein")
    H = HOLE(model_pdb, executable="~/hole2/exe/hole")  # set path to your hole binary
    H.run()
    H.collect()
    return H.min_radius()[0][-1]
    try:
        u.close()
    except:
        pass

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'
model_pdb = wd +model_dir+'/'+model_name+'/'+model_name+'.pdb'

In [None]:
model_pdb

In [None]:
model_Rmin = get_HOLE_Rmin(model_pdb)

In [None]:
import sys
sys.path.append('/home/ba13026/mpmodeling/')

from analyse_HOLE import hole

In [None]:
HOLE_data = hole(model_pdb)

<span style="color:red">ISSUE</span>: 
* Need to create soft link to PDB to avoid HOLE from crashing due to long path name.
* `MDAnalysis` already implements this solution.

Multiple frames, parallel implementation

In [None]:
path0 = wd+"cWza/conformation0/refined1_0001_INPUT_0908_ignorechain/complex/mdf/md_100ns/"

In [None]:
func = get_HOLE_Rmin

In [None]:
param_list = []
for i in range(100):
    model_pdb = path0+'Protein_'+str(i)+'.pdb'
    param_list.append(model_pdb)

**Serial**

In [None]:
for i in range(3):
    model_pdb = param_list[i]
    R_min = func(model_pdb)
    print(R_min)

**Parallel**

In [None]:
import concurrent.futures     
n_threads = 2

data = []

with concurrent.futures.ProcessPoolExecutor(max_workers = n_threads) as executor:
    out = executor.map(func, param_list[:3])
    data.append(out)

In [None]:
list(data[0])

In [None]:
output = list(data[0])

# Metrics

In [None]:
import os
import numpy 
import isambard_dev
import pandas
import seaborn
import MDAnalysis as mda
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  
%matplotlib notebook

In [None]:
def get_Rg_components(model_pdb):
    u = mda.Universe(model_pdb)
    protein = u.select_atoms("protein")
    protein_mass = numpy.sum(protein.masses)
    protein_inertia = protein.moment_of_inertia() # tensor of inertia
    f = lambda x : numpy.sqrt(x/float(protein_mass))
    protein_Rg_n = [f(protein_inertia[i][i]) for i in range(3)]
    Rg_x, Rg_y, Rg_z = protein_Rg_n
    return Rg_x, Rg_y, Rg_z
    try:
        u.close()
    except:
        pass

In [None]:
wd = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'

## Radius of gyration decomposition

## Docked structures: All conformation classified models

**STANDARD LAYOUT**

In [None]:
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(14,12))
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

for i in range(len(MUTANTS)):
    mutant = MUTANTS[i]
    for C in CONFORMATIONS[mutant]:
        X1 = []
        X2 = []
        mutant_dir = mutant+'/'+C 
        for model_dir in [mutant_dir+'/'+model for model in os.listdir(wd+mutant_dir)]:
            model_name = model_dir.split('/')[-1]
            model_pdb = wd+'/'+model_dir+'/'+model_name+'.pdb'
            Rg_x, Rg_y, Rg_z = get_Rg_components(model_pdb)
            X1.append(Rg_x)
            X2.append(Rg_z)
        seaborn.kdeplot(X1,X2,n_levels=20,cmap=CMAPS[C],ax=axes[mutant],zorder=0)
        for maximum in find_maxima(X1, X2):
            x,y = maximum
            axes[mutant].scatter(x,y,color='red',zorder=1)
    ###############################################                            
    # Customise plot
    ###############################################
    axes[mutant].set_title("Docked "+mutant+": Whole Conformational Group", fontsize=15)
    axes[mutant].tick_params(axis='both',direction='in',labelsize=20)
    axes[mutant].set_xlabel("$R_g$",fontsize=25)
    axes[mutant].set_ylabel("$R_g$",fontsize=25)

plt.tight_layout()
plt.show()

**CUSTOMISED LAYOUT**

In [None]:
import random
import pandas as pd
import numpy
from scipy.stats import gaussian_kde
from scipy import ndimage as ndi
from skimage.feature import peak_local_max
from shapely.geometry.polygon import Polygon
from shapely.ops import cascaded_union
import matplotlib.pyplot as plt
%matplotlib notebook

In [None]:
def density_estimation(m1,m2):
    """Estimated data density from a contour map according to a Gaussian Kernel"""
    xmin=min(m1)
    xmax=max(m1)
    ymin=min(m2)
    ymax=max(m2)
    X, Y = numpy.mgrid[xmin:xmax:400j, ymin:ymax:400j]                                                     
    positions = numpy.vstack([X.ravel(), Y.ravel()])                                                       
    values = numpy.vstack([m1, m2])                                                                        
    kernel = gaussian_kde(values)                                                                 
    Z = numpy.reshape(kernel(positions).T, X.shape)
    return X,Y,Z

def find_maxima(X1,X2):
    """Return the coordinates of the peaks of data density from a contour map"""
    X,Y,Z = list(density_estimation(X1,X2))
    # Find and filtre peaks by minimum pixel distance and fraction height glob-max
    coordinates = peak_local_max(Z, threshold_rel=0.5, min_distance=20)
    maxima = []
    for k in range(len(coordinates)):
        idx_0,idx_1 = coordinates[k]
        x_max, y_max = X[idx_0][0], Y[idx_0][idx_1]
        maxima.append((x_max, y_max))
    
    return maxima

def find_boundary_coords(contour_plot):
    """Return the coordinates of the boundary of the union of all closed contour lines"""
    polygons_list = []
    for l in range(len(contour_plot.allsegs)):
        prob = contour_plot.levels[l]
        for k in range(len(contour_plot.allsegs[l])):
            contour_plot_coords = contour_plot.allsegs[l][k]
            polygons_list.append(Polygon(contour_plot_coords))

    polygons_union = cascaded_union(polygons_list)
    polygons_union_boundary = numpy.array(list(polygons_union.boundary.coords)).T
    contour_plot_boundary = polygons_union_boundary
    
    return contour_plot_boundary

In [None]:
fig, ax = plt.subplots(1,1)

Colours = ['blue','green','red']
for i in range(len(DATA)):
    XY = DATA[i]
    X1,X2 = XY[0],XY[1]
    Xc,Yc,Zc = list( density_estimation(X1,X2) )
    contour_plot = ax.contour(Xc,Yc,Zc,20,colors='k',alpha=0.05)
    contour_plot_boundary = find_boundary_coords(contour_plot)  

    ax.fill(*contour_plot_boundary,color=Colours[i],alpha=0.2,zorder=0)

    for maximum in find_maxima(X1, X2):
        x,y = maximum
        ax.scatter(x,y,color=Colours[i],zorder=1)

ax.set_title("Overlapping Transparent Contour Plots")
ax.set_xlabel("X")
ax.set_ylabel("Y")

plt.show()

## Docked structures: Sample conformation classified models

In [None]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

In [None]:
MyTags  = [
    ['cWza', 'conformation0'],
    ['cWza', 'conformation1'],
    ['cWza-K375C', 'conformation0'],
    ['cWza-K375C', 'conformation1'],
    ['cWza-S355C', 'conformation0'],
    ['cWza-S355C', 'conformation1'],
    ['cWza-Y373C', 'conformation1']
]

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10))
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

for tag in MyTags:
    mutant, C = tag
    X = []; Z = []
    for i in range(len(Sample)):
        model_dir = Sample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            model_pdb = wd+model_dir+'/'+model_name+'.pdb'
            Rg_x, Rg_y, Rg_z = get_Rg_components(model_pdb)
            X.append(Rg_x)
            Z.append(Rg_z)
    seaborn.kdeplot(X,Z,n_levels=20,cmap=CMAPS[C],ax=axes[mutant])

plt.show()

## MD Protein frames: Sample classified models

### Generate Protein frames for all Sample MD trajectories

NOTES:

* Had to modify time length of trajectory to analyse (`0-100000 # (=100ns) ps`)
* Also, modified timestep of frames for extraction (`dt=100 # ps`)

**FIRST**: Remove old frames folder and back-up files

```bash
cat EMMD_Sample.txt | awk '{system("rm -r "$1"/complex/mdf/md_100ns")}'                               

cat EMMD_Sample.txt | awk '{system("rm -r "$1"/complex/mdf/md_100ns/#Protein*")}'
```

**SECOND**: Re-do frame extraction for increase sampling 
```bash
nohup python parallel_protein_extraction_urmd.py 20 EMMD_Sample.json > parallel_protein_extraction_urmd.out &
```

### Extract Radius of Gyration for frames

In [None]:
func = get_Rg_components

param_list = []
for i in range(100):
    model_pdb = path0+'Protein_'+str(i)+'.pdb'
    param_list.append(model_pdb)

In [None]:
import concurrent.futures     
n_threads = 10

data = []

with concurrent.futures.ProcessPoolExecutor(max_workers = n_threads) as executor:
    out = executor.map(func, param_list)
    data.append(out)

In [None]:
output = list(data[0])

### Extract HOLE dimensions 

In [None]:
import MDAnalysis as mda
from MDAnalysis.analysis.hole import HOLE

def get_HOLE_dimensions(model_pdb):
    u = mda.Universe(model_pdb)
    protein = u.select_atoms("protein")
    H = HOLE(model_pdb, executable="~/hole2/exe/hole")  # set path to your hole binary
    H.run()
    H.collect()
    # HOLE pore length
    Z_coords = [x[1] for x in numpy.array(list(H.profiles[0]))]
    HOLE_L = max(Z_coords) - min(Z_coords)
    # HOLE pore minimum radius
    HOLE_Rmin = H.min_radius()[0][-1]
    return HOLE_L, HOLE_Rmin
    try:
        u.close()
    except:
        pass

In [None]:
mutant = 'cWza-K375C'
C = 'conformation0'
model_dir = mutant+'/'+C 
model_name = 'refined1_0001_INPUT_0900_ignorechain'
model_pdb = wd +model_dir+'/'+model_name+'/'+model_name+'.pdb'

In [None]:
get_HOLE_dimensions(model_pdb)

Trajectory

In [None]:
func = get_HOLE_dimensions

In [None]:
path0 = wd+"cWza/conformation0/refined1_0001_INPUT_0908_ignorechain/complex/mdf/md_100ns/"

In [None]:
param_list = []
for i in range(1000):
    model_pdb = path0+'Protein_'+str(i)+'.pdb'
    param_list.append(model_pdb)

In [None]:
for i in range(990,1000):
    model_pdb = param_list[i]
    L, Rmin = func(model_pdb)
    print(L, Rmin)

In [None]:
MyTags  = [
    ['cWza', 'conformation0'],
    ['cWza', 'conformation1'],
    ['cWza-K375C', 'conformation0'],
    ['cWza-K375C', 'conformation1'],
    ['cWza-S355C', 'conformation0'],
    ['cWza-S355C', 'conformation1'],
    ['cWza-Y373C', 'conformation1']
]

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

fig, ax = plt.subplots(2,2,figsize=(10,10),sharex=True)
seaborn.set_style('darkgrid')

axes = {
    'cWza':ax[0,0],
    'cWza-K375C':ax[0,1],
    'cWza-S355C':ax[1,0],
    'cWza-Y373C':ax[1,1]
}

CMAPS = {
    'conformation0':'Blues',
    'conformation1':'Greens'
}

MySample = list(numpy.random.choice(Sample,120))

for tag in MyTags:
    mutant, C = tag
    X1 = []; X2 = []
    for i in range(len(MySample)):
        model_dir = MySample[i]
        mutant_sample, C_sample, model_name = model_dir.split('/')
        comparison = [mutant_sample, C_sample] == tag
        if comparison:
            path0 = wd+model_dir+'/complex/mdf/md_100ns/'
            if len(os.listdir(path0)) > 900:
                ################
                param_list = []
                for i in range(900,1000):
                    model_pdb = path0+'Protein_'+str(i)+'.pdb'
                    param_list.append(model_pdb)
                ################
                for i in range(len(param_list)):
                    model_pdb = param_list[i]
                    L, Rmin = get_HOLE_dimensions(model_pdb)
                    X1.append(L)
                    X2.append(Rmin)
            else:
                pass
    try: 
        seaborn.kdeplot(X1,X2,n_levels=20,cmap=CMAPS[C],ax=axes[mutant])
        axes[mutant].set_title(mutant+": HOLE (Last 10ns)", fontsize=15)
    except:
        pass
    axes[mutant].set_xlim(65,85)

plt.show()

# Comparison of Pore Profiles

**Main profiles to compare**

* Isambard-based VdW profiles (own method)
* HOLE hydrodynamic profiles (suspected to overestimate length)

**Objective**
Determine difference between profiles for pore minimum radius and lenght dimensions

**Datasets**

* All conformation-classified docked structures
* Conformaion-classified docked structures from Sample for MD
* Protein frames from First 10ns of MD (Sample)
* Protein frames from Last 10ns of MD (Sample)

## Define Datasets

### All conformation-classified docked structures

In [None]:
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']

CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1'],
}

param_list = []
for i in range(len(MUTANTS)):
    mutant = MUTANTS[i]
    for C in CONFORMATIONS[mutant]:
        mutant_dir = mutant+'/'+C 
        for model_dir in [mutant_dir+'/'+model for model in os.listdir(wd+mutant_dir)]:
            model_name = model_dir.split('/')[-1]
            model_pdb = wd+model_dir+'/'+model_name+'.pdb'
            param_list.append(model_pdb)

### Conformation-classified docked structures from Sample for MD

In [None]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

for i in range(len(Sample)):
    model_dir = Sample[i]
    model_name = model_dir.split('/')[-1]
    model_pdb = wd+model_dir+'/'+model_name+'.pdb'
    param_list.append(model_pdb)

### Protein frames from First 10ns of MD (Sample)

In [62]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

frame_init = 0 # 0 ns
frame_final = 100 # 10 ns

param_list = []
for i in range(len(Sample)):
    model_dir = Sample[i]
    model_name = model_dir.split('/')[-1]
    frames_dir = wd+model_dir+'/complex/mdf/md_100ns/'
    if os.path.isdir(frames_dir) and len(os.listdir(frames_dir))>900:
        for j in range(frame_init, frame_final):
            model_pdb = frames_dir+'Protein_'+str(j)+'.pdb'
            param_list.append(model_pdb)

### Protein frames from Last 10ns of MD (Sample)

In [None]:
import json

with open(wd+'EMMD_Sample.json','r') as fp:
    Sample = json.load(fp)

frame_init = 900 # 0 ns
frame_final = 1000 # 10 ns

param_list = []
for i in range(len(Sample)):
    model_dir = Sample[i]
    model_name = model_dir.split('/')[-1]
    frames_dir = wd+model_dir+'/complex/mdf/md_100ns/'
    if os.path.isdir(frames_dir) and len(os.listdir(frames_dir))>900:
        for j in range(frame_init, frame_final):
            model_pdb = frames_dir+'Protein_'+str(j)+'.pdb'
            param_list.append(model_pdb)

## Define functions for pore profile extraction

### HOLE hydrodynamic profile

In [None]:
import MDAnalysis as mda
from MDAnalysis.analysis.hole import HOLE

def get_HOLE_profile(model_pdb,endrad):
    u = mda.Universe(model_pdb)
    H = HOLE(model_pdb, executable="~/hole2/exe/hole",endrad=endrad)  # set path to your hole binary
    H.run()
    H.collect()
    # HOLE pore length
    Z_coords = [x[1] for x in numpy.array(list(H.profiles[0]))]
    R_coords = [x[2] for x in numpy.array(list(H.profiles[0]))]
    # HOLE pore minimum radius
    return Z_coords, R_coords
    try:
        u.close()
    except:
        pass

**EXAMPLE**

In [None]:
data = numpy.array(get_HOLE_profile(param_list[0],20)).T
df_hole = pandas.DataFrame(data, columns=['z','r'])

In [None]:
df_hole.plot(x='z',y='r')

### Isambard-based VdW profile

In [None]:
import sys
sys.path.append('/home/ba13026/mpmodeling/analysis/')
import isambard_dev
import radial_profile

**EXAMPLE**

In [None]:
p = isambard_dev.ampal.convert_pdb_to_ampal(param_list[0])
p_rad = radial_profile.RadialProfile(p)
data = p_rad.vdw('simple').T

df_isambard = pandas.DataFrame(data, columns=['z','r'])

In [None]:
df_isambard.plot(x='z',y='r')

### Comparison between profiles

**SIMPLE**

In [None]:
%matplotlib notebook

In [None]:
fig,ax = plt.subplots(1,1)

profiles_HOLE = []
for endrad in numpy.linspace(15,22,3):
    data = numpy.array(get_HOLE_profile(param_list[0],endrad)).T
    profiles_HOLE.append(data)
    
for data in profiles_HOLE:
    X,Y = data.T
    ax.plot(X,Y)
    
    
#     print(data)
#     df_hole = pandas.DataFrame(data, columns=['z','r'])
#     df_hole.plot(x='z',y='r',ax=ax,label='HOLE',legend=False)

# df_isambard.plot(x='z',y='r',ax=ax, label='Isambard')

# ax.set_title("Pore Profiles: Comparison", fontsize=20)
# ax.set_xlabel("Pore axis ($\AA$)", fontsize=15)
# ax.set_ylabel("Radial distance ($\AA$)", fontsize=15)

# ax.set_aspect('equal')
# plt.tight_layout()
ax.set_ylim(0,35)
plt.show()

In [None]:
X,Y = profiles_HOLE[2].T
plt.plot(X,Y)

In [None]:
fig,ax = plt.subplots(1,1,dpi=200)

#################################
data = numpy.array(get_HOLE_profile(param_list[0],40)).T
df_hole = pandas.DataFrame(data, columns=['z','r'])

df_hole.plot(x='z',y='r',ax=ax,color='green',label='HOLE')
df_hole_reverse = pd.DataFrame({
        'z':df_hole['z'],
        'r':df_hole['r'].apply(lambda x: x*-1)
        }
    )
df_hole_reverse.plot(x='z',y='r',color='green',ax=ax,legend=False)
#################################
df_isambard.plot(x='z',y='r',ax=ax,color='blue',label='Isambard')
df_isambard_reverse = pd.DataFrame({
        'z':df_isambard['z'],
        'r':df_isambard['r'].apply(lambda x: x*-1)
        }
    )
df_isambard_reverse.plot(x='z',y='r',color='blue',ax=ax,legend=False)
#################################
ax.set_title("Pore Profiles: Comparison", fontsize=15, y=1.05)
ax.set_xlabel("Pore axis ($\AA$)", fontsize=15)
ax.set_ylabel("Radial distance ($\AA$)", fontsize=15)


# for i in range(0,len(df_hole),30):
#     circle = plt.Circle((df_hole.z[i], 0), df_hole.r[i], color='r',fill=False,alpha=0.3)
#     ax.add_artist(circle)
    
# for i in range(0,len(df_hole),30):
#     circle = plt.Circle((df_hole.z[i], 0), df_hole.r[i], color='black',fill=False)
#     ax.add_artist(circle)

ax.set_aspect('equal')
ax.set_ylim(-40,40)
ax.legend(fontsize=5,loc='lower right')
plt.tight_layout()
plt.show()

**DOCKED MODELS**

Compare $R_{min}$ estimates

In [None]:
compare = []
for i in range(200):
    model_pdb = param_list[i]
    ##################################
    p = isambard_dev.ampal.convert_pdb_to_ampal(model_pdb)
    p_rad = radial_profile.RadialProfile(p)
    data = p_rad.vdw('simple')
    Rmin_isambard = (min(data[1]))
    ##################################
    data = numpy.array(get_HOLE_profile(model_pdb))
    Rmin_hole = (min(data[1]))
    compare.append([Rmin_isambard, Rmin_hole])

In [None]:
fig = plt.figure(dpi=150)
X,Y = numpy.array(compare).T
p_coeff = numpy.polyfit(X,Y, 1)
line_fit = numpy.poly1d(p_coeff)

plt.scatter(X,Y,label="100 Docked PDBs")
plt.plot(X,line_fit(X),label="Fitting (m="+str(round(p_coeff[0],2))+")")

plt.title("Pore Profiles: Comparison",fontsize=20,y=1.05)
plt.xlabel("$R_{min}$ Isambard ($\AA$)", fontsize=15)
plt.ylabel("$R_{min}$ HOLE ($\AA$)", fontsize=15)

plt.axes().set_aspect('equal')
plt.legend(loc='best')
plt.tight_layout()

plt.show()

Compare $L$ estimates

In [None]:
compare = []
for i in range(200):
    model_pdb = param_list[i]
    ##################################
    p = isambard_dev.ampal.convert_pdb_to_ampal(model_pdb)
    p_rad = radial_profile.RadialProfile(p)
    data = p_rad.vdw('simple')
    L_isambard = (max(data[0]) - min(data[0]))
    ##################################
    data = numpy.array(get_HOLE_profile(model_pdb))
    L_hole = (max(data[0]) - min(data[0]))
    compare.append([L_isambard, L_hole])

In [None]:
fig = plt.figure(dpi=150)

X,Y = numpy.array(compare).T
p_coeff = numpy.polyfit(X,Y, 1)
line_fit = numpy.poly1d(p_coeff)

plt.scatter(X,Y,label="100 Docked PDBs")
plt.plot(X,line_fit(X),label="Fitting (m="+str(round(p_coeff[0],2))+")")

plt.title("Pore Profiles: Comparison",fontsize=20,y=1.05)
plt.xlabel("$L$ Isambard ($\AA$)", fontsize=15)
plt.ylabel("$L$ HOLE ($\AA$)", fontsize=15)

plt.xticks(range(37,40))
plt.axes().set_aspect('equal')
plt.legend(loc='best',fontsize=10)
plt.tight_layout()

plt.show()

NEXT

Use own method to compute pore lengths

## TEST

In [None]:
model_pdb = param_list[0]

In [None]:
Rmin_hole = get_HOLE_Rmin(model_pdb)

In [None]:
Rmin_hole

Get length

In [None]:
def get_channel_length(model_pdb):
    make_ampal = isambard_dev.ampal.convert_pdb_to_ampal
    get_chain_Zcoords = lambda chain : [atom.z for atom in chain.get_atoms()]
    get_length = lambda chain : max(get_chain_Zcoords(chain)) - min(get_chain_Zcoords(chain))
    chains_lengths = list(map(get_length, model_ampal))
    return numpy.mean(get_chains_lengths(make_ampal(model_pdb)))

In [None]:
get_channel_length(model_pdb)

In [None]:
(len(param_list)*54*1e-3)/60

In [65]:
with open(wd+"param_list_metrics.json","w") as fp:
    json.dump(param_list,fp,indent=4)

In [None]:
%%writefile /home/ba13026/mpmodeling/analysis/setup_db_metrics.py
import os
import sys
import json
from sqlalchemy import Column, ForeignKey, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine

Base = declarative_base()

class Tags(Base):
    __tablename__ = 'tag'
    id = Column(Integer, primary_key=True) # SQL id 
    mutant = Column(String(250), nullable=False)
    group = Column(String(250), nullable=False)
    pdb_name = Column(String(250), nullable=False)
    frame = Column(String(250), nullable=False)

class Pore_Dimensions(Base):
    # Pore dimensions using HOLE and Isambard
    __tablename__ = 'pore_dimensions'
    id = Column(Integer, primary_key=True)
    #######################################
    pore_Rmin = Column(Float) # From HOLE
    pore_length = Column(Float) # From Isambard
    #######################################
    # Foreign key 
    tag_id = Column(Integer, ForeignKey('tag.id'))
    tag = relationship(Tags)
    
class Radii_of_Gyration(Base):
    # Pore dimensions using HOLE and Isambard
    __tablename__ = 'radii_of_gyration'
    id = Column(Integer, primary_key=True)
    #######################################
    Rg_x = Column(Float)
    Rg_y = Column(Float)
    Rg_z = Column(Float)
    #######################################
    # Foreign key 
    tag_id = Column(Integer, ForeignKey('tag.id'))
    tag = relationship(Tags)

if __name__ == "__main__":
    outdb = sys.argv[1] # Output name of database (.db)
    engine = create_engine('sqlite:///'+outdb)
    Base.metadata.create_all(engine)

In [25]:
%%writefile /home/ba13026/mpmodeling/analysis/insert2db_metrics.py
import sys
import os
import numpy
import operator
import subprocess
import json
import isambard_dev
import MDAnalysis as mda
import concurrent.futures
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

# Import modules from folder
modules_path = "/home/ba13026/mpmodeling/analysis/"
if modules_path not in sys.path:
    sys.path.append(modules_path)

import analyse_HOLE
from setup_db_metrics import Base, Tags, Pore_Dimensions, Radii_of_Gyration

def get_tags(model_pdb):
    idents = operator.itemgetter(*[0,1,2,-1])(model_pdb.split('/')[9:])
    mutant, group, model_name, frame = idents
    frame = frame[:-4]
    return mutant, group, model_name, frame
     
def get_Rg_components(model_pdb):
    try:
        u = mda.Universe(model_pdb)
        protein = u.select_atoms("protein")
        protein_mass = numpy.sum(protein.masses)
        protein_inertia = protein.moment_of_inertia() # tensor of inertia
        f = lambda x : numpy.sqrt(x/float(protein_mass))
        protein_Rg_n = [f(protein_inertia[i][i]) for i in range(3)]
        Rg_x, Rg_y, Rg_z = protein_Rg_n
        return Rg_x, Rg_y, Rg_z
    except:
        return 'Fail'

def get_HOLE_Rmin(model_pdb):
    dir_path = os.getcwd()
    tmp_wd = os.path.dirname(model_pdb)
    os.chdir(tmp_wd)
    try:
        HOLE_dimensions,HOLE_conductance_estimates = analyse_HOLE.hole(os.path.basename(model_pdb))
        os.chdir(dir_path)
        return HOLE_dimensions[0]
    except:
        os.chdir(dir_path)
        return 'Fail'
        
def get_channel_length(model_pdb):
    try:
        model_ampal = isambard_dev.ampal.convert_pdb_to_ampal(model_pdb)
        get_chain_Zcoords = lambda chain : [atom.z for atom in chain.get_atoms()]
        get_length = lambda chain : max(get_chain_Zcoords(chain)) - min(get_chain_Zcoords(chain))
        chains_lengths = list(map(get_length, model_ampal))
        return numpy.mean(chains_lengths)
    except:
        return 'Fail'

def process_model(n):
    model_pdb = param_list[n]
    #####################################
    # Model identifiers
    mutant, group, model_name, frame = get_tags(model_pdb)
    model_tags  = Tags(
                        mutant = mutant, 
                        group = group, 
                        pdb_name = model_name,
                        frame = frame
                        )
    session.add(model_tags)    
    #####################################
    # Radius of gyration decomposition
    data = get_Rg_components(model_pdb)
    model_Rgs = Radii_of_Gyration(
        Rg_x = data[0],
        Rg_y = data[1],
        Rg_z = data[2],
        tag  = model_tags
        )
    session.add(model_Rgs)
    #####################################
#     # Pore dimensions
    pore_Rmin = get_HOLE_Rmin(model_pdb)
    pore_length = get_channel_length(model_pdb)
    model_pore_dimensions = Pore_Dimensions(
        pore_Rmin = pore_Rmin,
        pore_length = pore_length,
        tag = model_tags
        )
    session.add(model_pore_dimensions)
    #####################################
    # COMMIT CHANGES TO DATABASE  
    session.commit()

#####################################
# Parallel Process Execution
def main():
    model_n = list(range(len(param_list)))
    with concurrent.futures.ProcessPoolExecutor(max_workers = ncores) as executor:
        executor.map(process_model, model_n)

if __name__ == '__main__':
    dbfile  = sys.argv[1] # Database filename
    param_json = sys.argv[2] # Dictionary with mutant structures info
    ncores = int(sys.argv[3]) # Number of cores

    # Extract info from dictionary
    with open(param_json, 'r') as fp:
        param_list = json.load(fp)

    # Create engine and bind it to current session
    engine = create_engine('sqlite:///'+dbfile)
    Base.metadata.bind = engine
    DBSession = sessionmaker(bind=engine)
    session = DBSession()

    main()

Overwriting /home/ba13026/mpmodeling/analysis/insert2db_metrics.py


```bash
python ~/mpmodeling/analysis/setup_db_metrics.py conf_metrics.db

nohup python ~/mpmodeling/analysis/insert2db_metrics.py conf_metrics.db param_list_metrics.json 20 > conf_metrics.out &
```

In [2]:
import json
wd='/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'

In [3]:
with open(wd+'param_list_metrics.json', 'r') as fp:
    param_list = json.load(fp)

In [4]:
n=0

In [5]:
model_pdb = param_list[n]
#####################################
# Model identifiers
mutant, group, model_name, frame = get_tags(model_pdb)

In [6]:
mutant, group, model_name, frame

('cWza', 'conformation0', 'refined1_0001_INPUT_0736_ignorechain', 'Protein_0')

In [7]:
data = get_Rg_components(model_pdb)

In [8]:
data

(15.984882804674134, 16.016707699104412, 18.390021687929075)

In [20]:
# pore_Rmin = get_HOLE_Rmin(model_pdb)
pore_length = get_channel_length(model_pdb)
print(pore_length)

Fail


In [23]:
get_channel_length(model_pdb)

34.636250000000004

In [10]:
pwd

'/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/cWza/conformation0/refined1_0001_INPUT_0736_ignorechain/complex/mdf/md_100ns'

In [66]:
dbfile = wd+'conf_metrics.db'
ncores = 20

In [67]:
# Create engine and bind it to current session
engine = create_engine('sqlite:///'+dbfile)
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()

In [77]:
len(param_list)

28600

In [93]:
len(session.query(Radii_of_Gyration.Rg_z).all())

3775

In [68]:
session.query(Pore_Dimensions.pore_Rmin).all()

[(8.31274),
 (7.98163),
 (7.78876),
 (8.04232),
 (7.60966),
 (6.07101),
 (7.85828),
 (7.9984),
 (8.5949),
 (7.56816),
 (6.00312),
 (7.28793),
 (7.21704),
 (7.58639),
 (7.88076),
 (7.24224),
 (7.28421),
 (7.04518),
 (7.01694),
 (7.89157),
 (7.7533),
 (8.18201),
 (7.64123),
 (7.62407),
 (8.05123),
 (7.88005),
 (7.85325),
 (8.16547),
 (7.74944),
 (7.96447),
 (7.86847),
 (8.01381),
 (8.34889),
 (7.89963),
 (8.26295),
 (7.846),
 (7.52168),
 (7.97542),
 (8.51523),
 (7.90142),
 (8.18963),
 (7.71966),
 (8.26823),
 (8.2167),
 (7.92152),
 (8.27487),
 (8.01268),
 (7.95422),
 (8.03316),
 (7.93224),
 (8.02857),
 (8.15103),
 (7.91488),
 (7.7513),
 (7.5444),
 (8.0686),
 (8.05306),
 (7.54442),
 (8.15533),
 (8.25503),
 (7.87122),
 (8.36938),
 (7.66252),
 (7.93893),
 (7.85157),
 (8.10263),
 (7.97287),
 (7.83011),
 (8.10705),
 (7.98437),
 (8.08117),
 (5.77749),
 (5.38724),
 (7.50712),
 (7.76273),
 (8.2402),
 (6.82284),
 (7.39971),
 (7.0216),
 (7.88834),
 (6.75217),
 (8.26218),
 (8.4736),
 (8.10235),
 (8.

In [105]:
session.query(Tags.frame).filter_by(mutant='cWza',group='conformation0',pdb_name='refined1_0001_INPUT_0736_ignorechain').all()

[('Protein_23'),
 ('Protein_21'),
 ('Protein_28'),
 ('Protein_18'),
 ('Protein_15'),
 ('Protein_0'),
 ('Protein_19'),
 ('Protein_27'),
 ('Protein_29'),
 ('Protein_11'),
 ('Protein_1'),
 ('Protein_7'),
 ('Protein_9'),
 ('Protein_13'),
 ('Protein_12'),
 ('Protein_6'),
 ('Protein_8'),
 ('Protein_4'),
 ('Protein_3'),
 ('Protein_26'),
 ('Protein_16'),
 ('Protein_25'),
 ('Protein_22'),
 ('Protein_38'),
 ('Protein_37'),
 ('Protein_47'),
 ('Protein_40'),
 ('Protein_50'),
 ('Protein_46'),
 ('Protein_59'),
 ('Protein_55'),
 ('Protein_53'),
 ('Protein_42'),
 ('Protein_39'),
 ('Protein_58'),
 ('Protein_48'),
 ('Protein_44'),
 ('Protein_49'),
 ('Protein_56'),
 ('Protein_60'),
 ('Protein_57'),
 ('Protein_43'),
 ('Protein_61'),
 ('Protein_63'),
 ('Protein_51'),
 ('Protein_65'),
 ('Protein_70'),
 ('Protein_69'),
 ('Protein_67'),
 ('Protein_72'),
 ('Protein_80'),
 ('Protein_68'),
 ('Protein_71'),
 ('Protein_76'),
 ('Protein_73'),
 ('Protein_88'),
 ('Protein_81'),
 ('Protein_86'),
 ('Protein_75'),
 ('Pr

In [58]:
session.query(Pore_Dimensions.pore_length).all()

[(37.65875000000001), (38.51375), (38.091249999999995), (38.45375), (37.64875)]

In [61]:
session.close_all()