# Reweight REPX RNA tetramers with PyMBAR

In [1]:
import os, sys, math
import numpy as np
import click
import glob
import mdtraj
import logging
import netCDF4 as nc
import warnings
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
#get_ipython().run_line_magic('matplotlib', 'inline')
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, AutoMinorLocator
import seaborn as sns

import barnaba as bb
from barnaba import definitions
from barnaba.nucleic import Nucleic

import openmmtools as mmtools

from pymbar import timeseries, MBAR

In [35]:
# warnings
warnings.filterwarnings("ignore")

# logging
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)

In [3]:
pd.options.display.max_rows = None
pd.options.display.max_columns = None
pd.options.display.precision = 1
pd.options.display.float_format = '{:.1f}'.format

In [4]:
#plt.rcParamsDefault

In [5]:
params_mydefault = {'legend.fontsize': 40, 
                    'font.size': 40, 
                    'axes.labelsize': 48,
                    'axes.titlesize': 48,
                    'xtick.labelsize': 40,
                    'ytick.labelsize': 40,
                    'savefig.dpi': 600, 
                    'figure.figsize': [64, 8],
                    'xtick.major.size': 10,
                    'xtick.minor.size': 7,
                    'ytick.major.size': 10,
                    'ytick.minor.size': 7}

In [6]:
plt.rcParams.update(params_mydefault)

In [7]:
# Jcoupling names and index. Couplings are pre-calculated in the order of  "H1H2", "H2H3", "H3H4", "1H5P", "2H5P", "1H5H4", "2H5H4", "H3P".
dict_coupling_mapping_by_atom_name = {}
dict_coupling_mapping_by_atom_name["H1H2"] = "nu1"  
dict_coupling_mapping_by_atom_name["H2H3"] = "nu2"  
dict_coupling_mapping_by_atom_name["H3H4"] = "nu3"  
dict_coupling_mapping_by_atom_name["1H5P"] = "beta1"  
dict_coupling_mapping_by_atom_name["2H5P"] = "beta2"  
dict_coupling_mapping_by_atom_name["1H5H4"] = "gamma1"  
dict_coupling_mapping_by_atom_name["2H5H4"] = "gamma2"  
dict_coupling_mapping_by_atom_name["H3P"] = "epsilon" 

dict_coupling_mapping_by_index = {}
dict_coupling_mapping_by_index[0] = "nu1"  
dict_coupling_mapping_by_index[1] = "nu2"  
dict_coupling_mapping_by_index[2] = "nu3"  
dict_coupling_mapping_by_index[3] = "beta1"  
dict_coupling_mapping_by_index[4] = "beta2"  
dict_coupling_mapping_by_index[5] = "gamma1"  
dict_coupling_mapping_by_index[6] = "gamma2"  
dict_coupling_mapping_by_index[7] = "epsilon" 

In [8]:
def radian_to_degree(a):
    """
    a : list
        [trajectory frame : residue : torsion]
    """
    
    a[np.where(a<0.0)] += 2.*np.pi
    a *= 180.0/np.pi

    # same as above
    #a = a*(180./np.pi)
    #a[np.where(a<0.0)] += 360
    
    return a

### define

In [11]:
def load_data(npzfile):
    """
    Load numpy data.

    Parameters
    ----------

    Returns
    -------
    """
    _npzfile = np.load(npzfile, allow_pickle=True)
    couplings = _npzfile["couplings"]         # [n_states, n_iterations, n_residues, n_jcoulings]
    
    return couplings

### define base setting

In [12]:
seq = "cccc"
benchmark_path = "/home/takabak/data/exploring-rna/rna-benchmark/data/tetramer"

### load data

In [13]:
md_trial = 1

In [14]:
ncfile = "../{}/enhanced.nc".format(md_trial)
npzfile = "../analysis.test/mydata{}_replica.npz".format(md_trial)

In [15]:
npzfile = np.load(npzfile, allow_pickle=True)
for k in npzfile.files:
    print(k)

bb_angles
pucker_angles
rg
rmsd
ermsd
stackings
couplings


In [16]:
couplings = npzfile['couplings']
_, _, n_residues, n_couplings = couplings.shape  # [n_replicas, n_iterations, n_residues, n_jcouplings]

print("observable shape is {}".format(couplings.shape))
print("found {} observables for {} residues".format(n_couplings, n_residues))

observable shape is (33, 30000, 4, 8)
found 8 observables for 4 residues


In [17]:
# trajectory
reporter = mmtools.multistate.MultiStateReporter(ncfile, open_mode='r')
analyzer = mmtools.multistate.MultiStateSamplerAnalyzer(reporter)
n_iterations, n_replicas, n_states = analyzer.n_iterations, analyzer.n_replicas, analyzer.n_states

print("found {} iterations, {} replicas, and {} states".format(n_iterations, n_replicas, n_states))



found 30000 iterations, 33 replicas, and 33 states


In [18]:
# residue names
try:
    ref_pdb = mdtraj.load('../../../../eq/min.pdb')
except:
    ref_pdb = mdtraj.load('../../../../prep/opc/min.pdb')
resnames = [ residue.name + str(i+1) for i, residue in enumerate(ref_pdb.topology.residues) if residue.name not in ["HOH", "NA", "CL"]]
print(">residue names: {}".format(resnames))

>residue names: ['C1', 'C2', 'C3', 'C4']


### check observable
We want to reshape this into something like `dict['residue_key'] = {'coupling_key': [n_replicas, n_iterations]}`.
Then calculate decoupling observables for each `dict['residue_key']['coupling_key']` and reformat for mbar analysis.

>decoupled_o_ln = calculate_decouple(dict['residue_key']['coupling_key'])  
>obs = decoupled_o_ln.flatten()  
>obs_dict_decoupled['residue_key'] = {'coupling_key': decoupled_obs}  

In [19]:
print(couplings.shape)  # n_replicas, n_iterations, n_residues, n_observables

(33, 30000, 4, 8)


In [236]:
# initialize
#dict_per_residue = defaultdict(list)  # key: residue name
dict_per_residue = {}  # key: residue name

for residue_index in range(n_residues):
    #dict_per_coupling = defaultdict(list)  # key: jcoupling name
    dict_per_coupling = {}  # key: jcoupling name

    for coupling_index in range(n_couplings):
        resname = resnames[residue_index]
        #print("analyze residue index {} ({}) for coupling index {} ({})".format(residue_index, resname, coupling_index, dict_coupling_mapping_by_index[coupling_index]))

        _o_kn = couplings[:,:,residue_index,coupling_index]  # [ n_replicas, n_iterations, n_residues, n_observables ]
        k, n = _o_kn.shape
        o_kn = np.zeros([k, n+1])  # add iteration to match the energy matrix shape
        o_kn[:,1:] = _o_kn
        
        # append 
        #dict_per_coupling[dict_coupling_mapping_by_index[coupling_index]].append(o_kn)
        dict_per_coupling[dict_coupling_mapping_by_index[coupling_index]] = o_kn
    
    # append
    #dict_per_residue[resnames[residue_index]].append(dict_per_coupling)
    dict_per_residue[resnames[residue_index]] = dict_per_coupling

In [237]:
dict_per_residue

{'C1': {'nu1': array([[ 0.        ,  1.31889141,  0.35617173, ...,  6.56379128,
          10.8358345 , 11.38743401],
         [ 0.        ,  0.04941816,  0.60006303, ..., 11.18356323,
          10.37164974,  9.49944019],
         [ 0.        , -0.01653824,  3.51535606, ...,  1.29619384,
           1.6604656 ,  2.62376571],
         ...,
         [ 0.        ,  1.42042994,  0.81182998, ...,  1.32028151,
           1.32687616,  2.84064031],
         [ 0.        ,  1.26891696,  0.62288868, ..., 10.50609016,
           5.94505501, 10.91505527],
         [ 0.        ,  1.12628865,  0.91856325, ..., 10.64995575,
          10.98170185, 11.32216072]]),
  'nu2': array([[0.        , 5.62223005, 4.19623852, ..., 7.50267506, 6.63122463,
          4.00907755],
         [0.        , 4.98578262, 4.16824913, ..., 5.41230488, 5.46961403,
          7.07699585],
         [0.        , 5.53752327, 5.44184017, ..., 4.97670269, 5.22728586,
          6.44061136],
         ...,
         [0.        , 4.34908438

In [238]:
dict_per_residue.keys()

dict_keys(['C1', 'C2', 'C3', 'C4'])

In [240]:
dict_per_residue['C1'].keys()

dict_keys(['nu1', 'nu2', 'nu3', 'beta1', 'beta2', 'gamma1', 'gamma2', 'epsilon'])

In [242]:
dict_per_residue['C1']['nu1'].shape

(33, 30001)

### compute decorrelated observables

Since `openmmtools.multistate.multistateanalyzer._get_equilibration_data` uses effective energy to detect equilibration, manually compute equilibration interation (n_equilibration), 
statistical insufficiency (g_t), and effective number of uncorrelated samples (n_effective_max) following similar procedures from https://github.com/choderalab/openmmtools/blob/1abb4f8112d231c2bbe1b42723ac692c50da631c/openmmtools/multistate/multistateanalyzer.py#L2009

In [243]:
def get_effective_observable_timeseries(u_kn):
    n_replicas, n_iterations = u_kn.shape
    u_n = np.zeros([n_iterations], np.float64)
    
    # Slice of all replicas, have to use this as : is too greedy
    replicas_slice = range(n_replicas)
    for iteration in range(n_iterations):
        # Slice the current sampled states by those replicas.
        u_n[iteration] = np.sum(u_kn[replicas_slice, iteration])

    return u_n

In [244]:
def _get_equilibration_data_custom(u_kn):
    
    # compute effective observable timeseries
    u_n = get_effective_observable_timeseries(u_kn)
    
    
    # For SAMS, if there is a second-stage start time, use only the asymptotically optimal data
    # if self._n_equilibration_iterations was not specified, discard minimization frame
    t0 = analyzer._n_equilibration_iterations if analyzer._n_equilibration_iterations is not None else 1

    # Discard equilibration samples
    max_subset = 100   # default setting in openmmtools
    i_t, g_i, n_effective_i = mmtools.multistate.utils.get_equilibration_data_per_sample(u_n[t0:], max_subset=max_subset)
    n_effective_max = n_effective_i.max()
    i_max = n_effective_i.argmax()
    n_equilibration = i_t[i_max] + t0 
    g_t = analyzer._statistical_inefficiency if analyzer._statistical_inefficiency is not None else g_i[i_max]
    
    # Store equilibration data
    #logger.debug('Equilibration data:')
    logger.debug(' number of iterations discarded to equilibration : {}'.format(n_equilibration))
    logger.debug(' statistical inefficiency of production region   : {}'.format(g_t))
    logger.debug(' effective number of uncorrelated samples        : {}'.format(n_effective_max))
    
    return n_equilibration, g_t, n_effective_max

##### detect equilibration

In [284]:
#dict_equilibration_data = defaultdict(list)  # key: residue name
dict_equilibration_data = {}

for residue_key in dict_per_residue.keys():    
    tmp_dict = {}
    for coupling_key in dict_per_residue[residue_key].keys():
        logger.debug(">residue: {} ({})".format(residue_key, coupling_key))
            
        # compute equilibration
        u_kn = dict_per_residue[residue_key][coupling_key]
        n_equilibration, g_t, n_effective_max = _get_equilibration_data_custom(u_kn)

        # Store equilibration data
        tmp_dict[coupling_key] = [n_equilibration, g_t, n_effective_max]
    #dict_equilibration_data[residue_key].append(tmp_dict)
    dict_equilibration_data[residue_key] = tmp_dict

DEBUG:>residue: C1 (nu1)
DEBUG: number of iterations discarded to equilibration : 27601
DEBUG: statistical inefficiency of production region   : 46.84543228149414
DEBUG: effective number of uncorrelated samples        : 51.253662109375
DEBUG:>residue: C1 (nu2)
DEBUG: number of iterations discarded to equilibration : 24601
DEBUG: statistical inefficiency of production region   : 15.128348350524902
DEBUG: effective number of uncorrelated samples        : 357.0118713378906
DEBUG:>residue: C1 (nu3)
DEBUG: number of iterations discarded to equilibration : 27601
DEBUG: statistical inefficiency of production region   : 45.645267486572266
DEBUG: effective number of uncorrelated samples        : 52.60129165649414
DEBUG:>residue: C1 (beta1)
DEBUG: number of iterations discarded to equilibration : 301
DEBUG: statistical inefficiency of production region   : nan
DEBUG: effective number of uncorrelated samples        : nan
DEBUG:>residue: C1 (beta2)
DEBUG: number of iterations discarded to equilibr

In [285]:
if np.isnan(dict_equilibration_data['C1']['beta2'][2]):
    print("yes")

yes


In [286]:
analyzer.read_energies?

[0;31mSignature:[0m [0manalyzer[0m[0;34m.[0m[0mread_energies[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Extract energies from the ncfile and order them by replica, state, iteration.

Returns
-------
sampled_energy_matrix : np.ndarray of shape [n_replicas, n_states, n_iterations]
    Potential energy matrix of the sampled states.
unsampled_energy_matrix : np.ndarray of shape [n_replicas, n_unsamped_states, n_iterations]
    Potential energy matrix of the unsampled states.
    Energy from each drawn sample n, evaluated at unsampled state l.
    If no unsampled states were drawn, this will be shape (0,N).
neighborhoods : np.ndarray of shape [n_replicas, n_states, n_iterations]
    Neighborhood energies were computed at, uses a boolean mask over the energy_matrix.
replica_state_indices : np.ndarray of shape [n_replicas, n_iterations]
    States sampled by the replicas in the energy_matrix
[0;31mFile:[0m      /lila/home/takabak/mambaforge/envs/openm

##### compute decoupled samples

In [301]:
# energy_data is [energy_sampled, energy_unsampled, neighborhood, replicas_state_indices]
energy_data = list(analyzer.read_energies())

# generate the equilibration data
sampled_energy_matrix, unsampled_energy_matrix, neighborhoods, replicas_state_indices = energy_data

DEBUG:Reading energies...
DEBUG:read_replica_thermodynamic_states: iteration = [    0     1     2 ... 29998 29999 30000]
DEBUG:Done.


In [302]:
print(sampled_energy_matrix.shape)
print(unsampled_energy_matrix.shape)
print(neighborhoods.shape)
print(replicas_state_indices.shape)

(33, 33, 30001)
(33, 0, 30001)
(33, 33, 30001)
(33, 30001)


In [303]:
#dict_decoupled_data = defaultdict(list)  # key: residue name
dict_decoupled_data = {}  # key: residue name

for residue_key in dict_equilibration_data.keys():
    
    tmp_dict = {}
    
    for coupling_key in dict_equilibration_data[residue_key]:
        number_equilibrated, g_t, Neff_max = dict_equilibration_data[residue_key][coupling_key]
        logger.debug(">residue: {} ({}) number_equilibrated: {}, g_t: {}, Neff_max: {}".format(residue_key, coupling_key, number_equilibrated, g_t, Neff_max))
        
        if np.isnan(g_t) or np.isnan(Neff_max):
            pass
        else:
            # -----
            # Energies
            # -----
            # remove equilibrated and decorrelated data from energy_data
            import copy
            _energy_data = copy.deepcopy(energy_data)
            
            for i, energies in enumerate(_energy_data):
                # Discard equilibration iterations.
                energies = mmtools.multistate.utils.remove_unequilibrated_data(energies, number_equilibrated, -1)
                # Subsample along the decorrelation data.
                _energy_data[i] = mmtools.multistate.utils.subsample_data_along_axis(energies, g_t, -1)    
            sampled_energy_matrix, unsampled_energy_matrix, neighborhood, replicas_state_indices = _energy_data

            # Initialize the MBAR matrices in ln form.
            n_replicas, n_sampled_states, n_iterations = sampled_energy_matrix.shape
            _, n_unsampled_states, _ = unsampled_energy_matrix.shape

            # We assume there are no unsampled states
            assert n_unsampled_states == 0

            n_total_states = n_sampled_states + n_unsampled_states
            energy_matrix = np.zeros([n_total_states, n_iterations*n_replicas])
            samples_per_state = np.zeros([n_total_states], dtype=int)

            # Compute shift index for how many unsampled states there were.
            # This assumes that we set an equal number of unsampled states at the end points.
            first_sampled_state = int(n_unsampled_states/2.0)
            last_sampled_state = n_total_states - first_sampled_state

            # Cast the sampled energy matrix from kln' to ln form.
            energy_matrix[first_sampled_state:last_sampled_state, :] = analyzer.reformat_energies_for_mbar(sampled_energy_matrix)
            # Determine how many samples and which states they were drawn from.
            unique_sampled_states, counts = np.unique(replicas_state_indices, return_counts=True)
            # Assign those counts to the correct range of states.
            samples_per_state[first_sampled_state:last_sampled_state][unique_sampled_states] = counts

            # decorrelated energies and state information
            decorrelated_u_ln = energy_matrix
            decorrelated_N_l = samples_per_state


            # -----
            # Observables
            # -----
            # remove equilibrated and decorrelated data from observable
            o_kn = dict_per_residue[residue_key][coupling_key]
            _o_kn = copy.deepcopy(o_kn)
            # Discard equilibration iterations.
            _o_kn = mmtools.multistate.utils.remove_unequilibrated_data(_o_kn, number_equilibrated, -1)
            # Subsample along the decorrelation data.
            decorrelated_o_kn = mmtools.multistate.utils.subsample_data_along_axis(_o_kn, g_t, -1)
            # Reformat
            decorrelated_o_n = decorrelated_o_kn.flatten()
           
            # Store temporary
            tmp_dict[coupling_key] = {'u_ln': decorrelated_u_ln, 'N_l': decorrelated_N_l, 'o_n': decorrelated_o_n}
    
    # Store
    dict_decoupled_data[residue_key] = tmp_dict

DEBUG:>residue: C1 (nu1) number_equilibrated: 27601, g_t: 46.84543228149414, Neff_max: 51.253662109375
DEBUG:>residue: C1 (nu2) number_equilibrated: 24601, g_t: 15.128348350524902, Neff_max: 357.0118713378906
DEBUG:>residue: C1 (nu3) number_equilibrated: 27601, g_t: 45.645267486572266, Neff_max: 52.60129165649414
DEBUG:>residue: C1 (beta1) number_equilibrated: 301, g_t: nan, Neff_max: nan
DEBUG:>residue: C1 (beta2) number_equilibrated: 301, g_t: nan, Neff_max: nan
DEBUG:>residue: C1 (gamma1) number_equilibrated: 301, g_t: 146.2100067138672, Neff_max: 203.13931274414062
DEBUG:>residue: C1 (gamma2) number_equilibrated: 301, g_t: 115.8349380493164, Neff_max: 256.407958984375
DEBUG:>residue: C1 (epsilon) number_equilibrated: 27301, g_t: 25.832744598388672, Neff_max: 104.55722045898438
DEBUG:>residue: C2 (nu1) number_equilibrated: 15301, g_t: 69.37382507324219, Neff_max: 211.90989685058594
DEBUG:>residue: C2 (nu2) number_equilibrated: 3301, g_t: 71.31639862060547, Neff_max: 374.401977539062

In [304]:
dict_decoupled_data

{'C1': {'nu1': {'u_ln': array([[-49059.06957166, -49948.48731673, -49979.08500271, ...,
           -50631.35341319, -50681.47393045, -50244.12259948],
          [-49089.71237187, -49975.0855273 , -50010.45519126, ...,
           -50656.3846921 , -50703.6162689 , -50269.06335448],
          [-49120.3551598 , -50001.6837259 , -50041.8253701 , ...,
           -50681.41596084, -50725.75859761, -50294.00409843],
          ...,
          [-35435.77251226, -36055.0485894 , -36101.07619149, ...,
           -36538.25308186, -36559.75459406, -36259.31494558],
          [-35008.31175204, -35620.1175187 , -35665.58989108, ...,
           -36097.49312851, -36118.73526855, -35821.91981538],
          [-34591.0409655 , -35195.55450181, -35240.48488026, ...,
           -35667.24018011, -35688.22913101, -35394.95147819]]),
   'N_l': array([52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
          52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52]),
   'o_n': array([ 8.2

In [305]:
dict_decoupled_data.keys()

dict_keys(['C1', 'C2', 'C3', 'C4'])

In [306]:
dict_decoupled_data['C1'].keys()

dict_keys(['nu1', 'nu2', 'nu3', 'gamma1', 'gamma2', 'epsilon'])

In [307]:
dict_decoupled_data['C1']['nu1']['u_ln'].shape

(33, 1716)

In [308]:
dict_decoupled_data['C1']['nu1']['N_l'].shape

(33,)

In [309]:
dict_decoupled_data['C1']['nu1']['o_n'].shape

(1716,)

### MBAR

In [314]:
# test
u_ln = dict_decoupled_data['C1']['nu1']['u_ln']
N_l = dict_decoupled_data['C1']['nu1']['N_l']
o_n = dict_decoupled_data['C1']['nu1']['o_n']

assert u_ln.shape[0] == N_l.size and u_ln.shape[1] == o_n.size

# mbar
mbar = MBAR(u_ln, N_l)      
u0 = u_ln[0,:]   # energy from state 0
results = mbar.computeExpectations(o_n, u0, return_dict=True)

results

{'mu': array([0.63351046]), 'sigma': array([0.08308758])}

##### reweight for all residues and observables

In [317]:
dict_decoupled_data['C1'].keys()

dict_keys(['nu1', 'nu2', 'nu3', 'gamma1', 'gamma2', 'epsilon'])

In [324]:
# store results for each residue

dict_reweighted_coupling = {}

for residue_key in dict_decoupled_data.keys():
    
    tmp_dict = {}
    
    for coupling_key in dict_decoupled_data[residue_key].keys():
        print(residue_key, coupling_key)
    
        u_ln = dict_decoupled_data[residue_key][coupling_key]['u_ln']
        N_l = dict_decoupled_data[residue_key][coupling_key]['N_l']
        o_n = dict_decoupled_data[residue_key][coupling_key]['o_n']
        assert u_ln.shape[0] == N_l.size and u_ln.shape[1] == o_n.size

        # mbar
        mbar = MBAR(u_ln, N_l)      
        u0 = u_ln[0,:]   # energy from state 0
        results = mbar.computeExpectations(o_n, u0, return_dict=True)
        
        #mu, sigma = float(results['mu']), float(results['sigma'])
        tmp_dict[coupling_key] = results
        
    dict_reweighted_coupling[residue_key] = tmp_dict

C1 nu1
C1 nu2
C1 nu3
C1 gamma1
C1 gamma2
C1 epsilon
C2 nu1
C2 nu2
C2 nu3
C2 beta1
C2 beta2
C2 gamma1
C2 gamma2
C2 epsilon
C3 nu1
C3 nu2
C3 nu3
C3 beta1
C3 beta2
C3 gamma1
C3 gamma2
C3 epsilon
C4 nu1
C4 nu2
C4 nu3
C4 beta1
C4 beta2
C4 gamma1
C4 gamma2


##### plot

In [350]:
def load_benchmark_data(benchmark_path, seq, keyname):
    # load
    import yaml
    yfile = os.path.join(benchmark_path, seq, "00_data", "experiment.yml")
    with open(yfile, "r") as file:
        d = yaml.safe_load(file)
        
    #print(d)

    # parameter names
    param_names = []
    for res in d['experiment_1']['measurement'].keys():
        names = d['experiment_1']['measurement'][res].keys()
        param_names = [ _name.replace('_', '') for _name in names ]
        break

    try:
        mydict = {}
        for res in d[keyname]['measurement'].keys():
            names = d[keyname]['measurement'][res].keys()
            
            vals = []
            for name in names:
                v = d[keyname]['measurement'][res][name]['value']
                #print(res, name, v)
                if v == None:
                    v = 0
                vals.append(v)
            mydict[res] = vals

        # convert to pandas
        df = pd.DataFrame.from_dict(mydict)
        df.index = param_names
        df = df.T
    except:
        # computational_2 missing. force all data to zero.
        mydict = {}
        for res in d[keyname]['measurement'].keys():
            names = d[keyname]['measurement'][res].keys()
            
            vals = []
            for name in names:
                v = d[keyname]['measurement'][res][name]['value']
                #print(res, name, v)
                if v == None:
                    v = 0
                vals.append(v)
            mydict[res] = vals

        # convert to pandas
        df = pd.DataFrame.from_dict(mydict)
        df.index = param_names
        df = df.T
        df.loc[:,:] = 0

    return df

In [355]:
def plot(result_dict_per_residue, repx_index, output_prefix, benchmark_path, seq):

    df_exp = load_benchmark_data(benchmark_path, seq, keyname="experiment_1")
    df_rev = load_benchmark_data(benchmark_path, seq, keyname="computational_1")
    df_a14 = load_benchmark_data(benchmark_path, seq, keyname="computational_2")

    pd.options.display.float_format = '{:.2f}'.format
    df = pd.DataFrame(result_dict_per_residue).T

    # plot
    import matplotlib.colors as mcolors
    mycolors = mcolors.TABLEAU_COLORS

    names = ["beta1", "beta2", "gamma1", "gamma2", "epsilon", "nu1", "nu2", "nu3"]
    fig, axes = plt.subplots(4,1, figsize=(18, 12), sharex=True)
    for i, resname in enumerate(result_dict_per_residue.keys()):
        xpos = 0

        for name, mycolor in zip(names, mycolors):
            # hide values with zero
            exp_scale, rev_scale, a14_scale, scale = 1, 1, 1, 1
            if df_exp[name][i] == 0:
                exp_scale = 0
            if df_rev[name][i] == 0:
                rev_scale = 0
                scale = 0
            if df_a14[name][i] == 0:
                a14_scale = 0

            axes[i].scatter(xpos-0.1, df_exp[name][i], marker='x', s=60 * exp_scale, c=mycolors[mycolor])
            axes[i].scatter(xpos, df_rev[name][i], marker='^', s=60 * rev_scale, c=mycolors[mycolor])
            axes[i].scatter(xpos+0.1, df_a14[name][i], marker='_', s=60 * a14_scale, c=mycolors[mycolor])
            try:
                axes[i].errorbar(xpos+0.2, df[name][i]['mu'], yerr=df[name][i]['sigma'], fmt='o', capsize=6 * scale, markersize=10 * scale, c=mycolors[mycolor])
            except:
                #axes[i].errorbar(xpos+0.2, 0, 0, fmt='o', capsize=6 * scale, markersize=10 * 0, c=mycolors[mycolor])
                pass

            # axes
            axes[i].set_title(resname, x=0.03, y=0.75, fontsize=24)
            axes[i].yaxis.set_minor_locator(AutoMinorLocator())
            axes[i].yaxis.set_ticks_position("left")
            axes[i].set_ylim(-1.5,13.5)
            axes[i].set_xlim(-0.5,7.5)

            # increment position
            xpos += 1

    axes[i].set_xticks([0,1,2,3,4,5,6,7], [r"$\beta$1",r"$\beta$2",r"$\gamma$1",r"$\gamma$2",r"$\epsilon$",r"$\nu$1",r"$\nu$2",r"$\nu$3"])
    axes[i].set_ylabel("$^3$J (Hz)")
    axes[i].yaxis.set_label_coords(-0.1, 2)
    axes[i].scatter(xpos, xpos, marker='x', s=60, c="k", label="Experimental")
    axes[i].scatter(xpos, xpos, marker='^', s=60, c="k", label="DEShaw.Revised")
    axes[i].scatter(xpos, xpos, marker='_', s=60, c="k", label="DEShaw.Amber.OL3")
    axes[i].scatter(xpos, xpos, marker='o', s=60, c="k", label="HREMD.Amber.OL3")
    axes[i].legend(bbox_to_anchor=(0.23, 4.05), fontsize=16)

    plt.subplots_adjust(hspace=0.1, wspace=0.1)
    plt.tight_layout()
    #plt.show()
    plt.savefig("{}/coupling_repx{}.png".format(output_prefix, repx_index))
    plt.close()
    
    # dataframe to pickles
    df.to_pickle("{}/coupling_repx{}.pkl".format(output_prefix, repx_index))

In [356]:
repx_index = 1
output_prefix = "."

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING)
plot(dict_reweighted_coupling, repx_index, output_prefix, benchmark_path, seq)

DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
DEBUG:title position was updated manually, not adjusting
