In [1]:
import glob
import importlib
import random
import sys
from itertools import combinations

import matplotlib as mpl
import matplotlib.pyplot as plt
import MDAnalysis as mda
import mdtraj as md
import numpy as np
import prettypyplot as pplt
import pyemma
import seaborn as sns
from matplotlib import ticker
from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis

import extq
import ivac

  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):


In [2]:
sys.path.insert(1, "../../python")
import plotting
import util

In [3]:
%config InlineBackend.figure_format = "retina"

In [4]:
pplt.load_cmaps()
plt.style.use("custom")  # custom style sheet
plt.style.use("muted")  # muted color theme from SciencePlots
colors = mpl.colors.to_rgba_array(
    [
        "#364B9A",
        "#4A7BB7",
        "#6EA6CD",
        "#98CAE1",
        "#C2E4EF",
        "#EAECCC",
        "#FEDA8B",
        "#FDB366",
        "#F67E4B",
        "#DD3D2D",
        "#A50026",
    ]
)
cm_div = mpl.colors.LinearSegmentedColormap.from_list("diverging", colors)
mpl.colormaps.register(cm_div, force=True)

In [5]:
from joblib import Parallel, delayed
from multiprocess import cpu_count

n_jobs = cpu_count()

# Lipid h-bonds

In [6]:
HOME_DIR = "/project/dinner/scguo/ci-vsd"
CUTOFF = 3.5
ANGLE = 120

In [7]:
def hbond_old(trajfile, r_i, cutoff=3.4, angle=90):
    u = mda.Universe(f"{HOME_DIR}/models/MD-clustering-center/civsd.psf", trajfile)
    r_sel = f"protein and resid {r_i} and not backbone and type NC2"
    phos_sel = f"name O11 O12 O13 O14 and around 3.0 (protein and resid {r_i})"
    h_sel = f"protein and resid {r_i} and type HC"
    hbonds = HydrogenBondAnalysis(
        universe=u,
        donors_sel=r_sel,
        hydrogens_sel=h_sel,
        acceptors_sel=phos_sel,
        update_selections=True,
        d_a_cutoff=cutoff,
        d_h_a_angle_cutoff=angle,
    )
    return hbonds

In [8]:
def run_analysis(analysis):
    analysis.run()
    return analysis.count_by_time()

In [9]:
def compute_hb_cutoff_angle(hb_fn, r_i, files, cutoff, angle, n_jobs=40, verbose=10):
    analysis_ensemble = [hb_fn(f, r_i, cutoff=cutoff, angle=angle) for f in files]
    results = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(run_analysis)(analysis) for analysis in analysis_ensemble
    )
    return results

In [10]:
def files_second():
    remove = {
        1282,
        1283,
        1284,
        1285,
        1286,
        1288,
        1289,
        1290,
        1187,
        1188,
        1189,
        1190,
        1191,
        1197,
        1198,
        1199,
        1203,
        1205,
        1206,
        1207,
        1211,
        1212,
        1213,
        1214,
        1215,
        1225,
        1226,
        1227,
        1228,
        1231,
        1232,
        1233,
        1236,
        1237,
        1238,
        1242,
        1245,
        1246,
        1252,
        1253,
        1260,
        1261,
        1262,
        1263,
        1266,
        1267,
        1268,
        1269,
        1270,
        1271,
        1272,
        1273,
        1274,
        1275,
        1276,
        1277,
        1278,
        1279,
    }
    files = []
    for i in range(179, 295):
        if i == 180:
            continue
        if (i + 1000) not in remove:
            files.append(f"/project/dinner/scguo/anton-old/civsd_{i}.dcd")
    return files

In [11]:
dcds_second = files_second()

In [12]:
dcds_second

['/project/dinner/scguo/anton-old/civsd_179.dcd',
 '/project/dinner/scguo/anton-old/civsd_181.dcd',
 '/project/dinner/scguo/anton-old/civsd_182.dcd',
 '/project/dinner/scguo/anton-old/civsd_183.dcd',
 '/project/dinner/scguo/anton-old/civsd_184.dcd',
 '/project/dinner/scguo/anton-old/civsd_185.dcd',
 '/project/dinner/scguo/anton-old/civsd_186.dcd',
 '/project/dinner/scguo/anton-old/civsd_192.dcd',
 '/project/dinner/scguo/anton-old/civsd_193.dcd',
 '/project/dinner/scguo/anton-old/civsd_194.dcd',
 '/project/dinner/scguo/anton-old/civsd_195.dcd',
 '/project/dinner/scguo/anton-old/civsd_196.dcd',
 '/project/dinner/scguo/anton-old/civsd_200.dcd',
 '/project/dinner/scguo/anton-old/civsd_201.dcd',
 '/project/dinner/scguo/anton-old/civsd_202.dcd',
 '/project/dinner/scguo/anton-old/civsd_204.dcd',
 '/project/dinner/scguo/anton-old/civsd_208.dcd',
 '/project/dinner/scguo/anton-old/civsd_209.dcd',
 '/project/dinner/scguo/anton-old/civsd_210.dcd',
 '/project/dinner/scguo/anton-old/civsd_216.dcd',


In [13]:
hb_lipid_first = np.load("../../data/raw_feat/hbond_phos_0-179.npy", allow_pickle=True)
print(hb_lipid_first.shape)

(179, 10000, 5)


In [15]:
results_second = []
for r_i in (217,):
    single_second = compute_hb_cutoff_angle(
        hbond_old, r_i, dcds_second, CUTOFF, ANGLE, n_jobs=n_jobs, verbose=10
    )
    hb_new_short = []
    for hb in single_second:
        length = len(hb)
        if length > 10000 and length < 100000:
            length = 10000
        hb_new_short.append(hb[:length])
    results_second.append(hb_new_short)

[Parallel(n_jobs=48)]: Using backend LokyBackend with 48 concurrent workers.
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  MIN_CHEMFILES_VERSION = LooseVersion("0

In [16]:
len(results_second)

1