In [6]:
import glob
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
from natsort import natsorted, ns
import matplotlib.pyplot as plt

In [56]:
def read_data(receptor, ligand, time_limit=0, Verbose=False, exclusions=[]):
    data = {"Interface":[], "Interface_A":[], "Interface_B":[], "System":[], "Simulation":[], "Time":[], "Frame":[]}
    _filein = glob.glob("/home/khlee/work/desmond/output/"+receptor+"/"+ligand+"/*/")
    _filein = natsorted(_filein, key=lambda y: y.lower())
    if Verbose:
        print(_filein)
    for i in range(len(_filein)):
        if _filein[i].split("/")[-1] in exclusions:
            continue
        try:
            t = np.loadtxt(_filein[i]+'/ene/time.dat')
            frame = list(range(1,len(t)+1))
            obs = pickle.load(open(_filein[i]+'/output/gpcr_interface.p',"rb"))
            obs_a = pickle.load(open(_filein[i]+'/output/gpcr_interface_RA.p',"rb"))
            obs_b = pickle.load(open(_filein[i]+'/output/gpcr_interface_RB.p',"rb"))
        
            assert (len(t)==len(obs))
            assert (len(obs) == len(obs_a))
            assert (len(obs) == len(obs_b))
            
            temp = pd.DataFrame({"obs":obs, "obs_a":obs_a, "obs_b":obs_b, "t":t, "frame":frame})
            
            sel = list(temp[temp["t"]>=time_limit]["obs"])
            sel_a = list(temp[temp["t"]>=time_limit]["obs_a"])
            sel_b = list(temp[temp["t"]>=time_limit]["obs_b"])
            sel_t = list(temp[temp["t"]>=time_limit]["t"])
            sel_frame = list(temp[temp["t"]>=time_limit]["frame"])
            
            data["Interface"] += sel
            data["Interface_A"] += sel_a
            data["Interface_B"] += sel_b
            data["System"]+=[receptor for a in range(len(sel))]
            data["Simulation"]+=[_filein[i].split("/")[-2].split("_")[-1] for a in range(len(sel))]
            data["Time"]+=sel_t
            data["Frame"]+=sel_frame
        except:
            continue
    
    return pd.DataFrame(data)

def check_interface(df, receptor=None):
    mismatch = []
    for i in range(len(df)):
        if (df["Interface"][i] - (df["Interface_A"][i] + df["Interface_B"][i])) != 0:
            mismatch.append(receptor+"_"+df["Simulation"][i])
    return mismatch

In [57]:
d2gi = read_data("d2gi", "bro")

In [58]:
d2gi_mm = check_interface(d2gi, "d2gi")
d2gi_mm

[]

In [59]:
d2go = read_data("d2go", "bro")

In [61]:
d2go_mm = check_interface(d2go, "d2go")
len(d2go_mm)

9459

In [54]:
len(d2go)

11850

In [62]:
set(d2go_mm)

{'d2go_bro', 'd2go_bro.1', 'd2go_bro.2', 'd2go_bro.3', 'd2go_bro.5'}

In [63]:
d3gi_prm = read_data("d3gi", "prm")
d3gi_prm_mm = check_interface(d3gi_prm, "d3gi")
len(d3gi_prm_mm)

0

In [64]:
d3go_prm = read_data("d3go", "prm")
d3go_prm_mm = check_interface(d3go_prm, "d3go")
len(d3go_prm_mm)

6450

In [65]:
len(d3go_prm)

15800

In [66]:
set(d3go_prm_mm)

{'d3go_prm.f1',
 'd3go_prm.f2',
 'd3go_prm.f3',
 'd3go_prm.f4',
 'd3go_prm.f5',
 'd3go_prm.f6'}

In [68]:
d3go_pd = read_data("d3go", "pd")
d3go_pd_mm = check_interface(d3go_pd, "d3go")
len(d3go_pd_mm)

2066

In [69]:
len(d3go_pd)

8200

In [70]:
set(d3go_pd_mm)

{'d3go_pd.f1', 'd3go_pd.f2', 'd3go_pd.f4', 'd3go_pd.f5', 'd3go_pd.f6'}

In [71]:
d3gi_pd = read_data("d3gi", "pd")
d3gi_pd_mm = check_interface(d3gi_pd, "d3gi")
len(d3gi_pd_mm)

3244

In [72]:
len(d3gi_pd)

6800

In [73]:
set(d3gi_pd_mm)

{'d3gi_pd.f3', 'd3gi_pd.f4', 'd3gi_pd.f6'}