# Analyzer

In [1]:
from pymbar import timeseries
from pymbar import MBAR
from perses.analysis import utils
import os
from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer
import networkx as nx
from itertools import combinations 
import time

In [2]:
phases = ['vac', 'sol']
residues = ['ALA', 'CYS', 'SER', 'THR']
graphs = {phase: nx.DiGraph(title = f'{phase} thermocycle') for phase in phases}
for graph in graphs.values():
    for residue in residues:
        graph.add_node(residue)

In [None]:
for phase in phases:
    graph = graphs[phase]
    for peptide_transform in combinations(residues, 2):
        for direct in [peptide_transform, peptide_transform[::-1]]:
            try:
                reporter = MultiStateReporter(f"{direct[0]}_{direct[1]}.{phase}.default_map.nc")
                analyzer = MultiStateSamplerAnalyzer(reporter)
                f_ij, df_ij = analyzer.get_free_energy()
                dg = f_ij[0,-1]
                ddg = df_ij[0,-1]**2
                graph.add_edge(*direct, dg = dg, ddg = ddg)
                
            except Exception as e:
                print(e)
            time.sleep(3)

In [None]:
graphs
for graph in graphs.values():
    for edge in graph.edges(data = True):
        _dict = edge[2]
        _dict.update({key: "{0:.2f}".format(val) for key, val in _dict.items()})

In [None]:
graphs['sol'].edges(data = True)

In [6]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
G = graphs['sol']
pos = nx.circular_layout(G)


nx.draw_networkx(G,
                 pos = pos,
                 arrowsize = 20,
                 node_size = 1000,
                 node_color = 'lightsteelblue')
plt.savefig(f"thermocycle.png")

In [None]:
from IPython.display import display, HTML
import pandas as pd

In [None]:
# students = [ ('jack', 34, 'Sydeny' , 'Australia') ,
#              ('Riti', 30, 'Delhi' , 'India' ) ,
#              ('Vikas', 31, 'Mumbai' , 'India' ) ,
#              ('Neelu', 32, 'Bangalore' , 'India' ) ,
#              ('John', 16, 'New York' , 'US') ,
#              ('Mike', 17, 'las vegas' , 'US')  ]

free_energies = []
indices = []
for edge_sol, edge_vac in zip(graphs['sol'].edges(data = True), graphs['sol'].edges(data = True)):
    a,b, sol_dg, sol_ddg = edge_sol[0], edge_sol[1], float(edge_sol[2]['dg']), float(edge_sol[2]['ddg'])
    vac_dg, vac_ddg = float(graphs['vac'].edges[a, b]['dg']), float(graphs['vac'].edges[a, b]['ddg'])
    dg = sol_dg - vac_dg
    ddg = sol_ddg + vac_ddg
    indices.append((a,b)) 
    free_energies.append((sol_dg, sol_ddg, vac_dg, vac_ddg, dg, ddg))

dg_dict = {index : entry for index, entry in zip(indices, free_energies)}

fb_indices = []
fb_free_energies = []
for peptide_transform in combinations(residues, 2):
    forward, backward = dg_dict[peptide_transform], dg_dict[peptide_transform[::-1]]
    tup = (forward[0] + backward[0], forward[2] + backward[2], forward[4] + backward[4])
    fb_indices.append(peptide_transform)
    fb_free_energies.append(tup)


df_sol = pd.DataFrame(free_energies, columns = ['solvent dG', 'solvent ddG', 'vacuum dG', 'vacuum ddG', 'solvation dG', 'solvation ddG'], index = indices)
fb = pd.DataFrame(fb_free_energies, columns = ['solvent discrepancy', 'vacuum discrepancy', 'solvation energy discrepancy'], index = fb_indices)

In [None]:
display(df_sol)

In [None]:
display(fb)

In [None]:
dg_dict

In [None]:
tricycles = combinations(residues, 3)
tricycle_list = []
for triplet in tricycles:
    loop = [(i, j) for i, j in [triplet[:2], triplet[1:3], [triplet[2], triplet[0]]]]
    rev_loop = [i[::-1] for i in loop]
    dg = 0
    rev_dg = 0
    for edge in loop:
        dg += dg_dict[edge][4]
    for edge in rev_loop:
        rev_dg += dg_dict[edge][4]
    
    forward_graph = nx.DiGraph(title = dg)
    for edge in loop:
        forward_graph.add_edge(edge[0], edge[1])
    
    reverse_graph = nx.DiGraph(title = rev_dg)
    for edge in rev_loop:
        reverse_graph.add_edge(edge[0], edge[1])
    tricycle_list.append((forward_graph, reverse_graph))

In [None]:
for tup in tricycle_list:
    fwd_graph, rev_graph = tup
    fwd_pos = nx.circular_layout(fwd_graph)
    rev_pos = nx.circular_layout(rev_graph)


    nx.draw_networkx(fwd_graph,
                     pos = fwd_pos,
                     arrowsize = 20,
                     node_size = 1000,
                     node_color = 'lightsteelblue',
                     label = fwd_graph.graph['title'])
    plt.savefig(f"{tup[0]}.png")
    nx.draw_networkx(rev_graph,
                 pos = rev_pos,
                 arrowsize = 20,
                 node_size = 1000,
                 node_color = 'lightsteelblue',
                 label = rev_graph.graph['title'])
    plt.savefig(f"{tup[1]}.png")
    

In [5]:
from perses.analysis import utils

## we are going to look at a timeseries...

In [None]:
_returnable = {}
for peptide_transform in combinations(residues, 2):
    try:
        reporter = MultiStateReporter(f"{peptide_transform[0]}_{peptide_transform[1]}.sol.default_map.nc")
        ncfile = utils.open_netcdf(f"{peptide_transform[0]}_{peptide_transform[1]}.sol.default_map.nc")
        n_iterations = ncfile.variables['last_iteration'][0]
        dg, ddg = list(), list()
        for step in range(1, n_iterations, 30):
            analyzer = MultiStateSamplerAnalyzer(reporter,max_n_iterations = step)
            f_ij, df_ij = analyzer.get_free_energy()
            dg.append(f_ij[0,-1])
            ddg.append(df_ij[0,-1])
        
        _returnable[peptide_transform] = (dg, ddg)
        
    except Exception as e:
        print(e)

import pickle
with open(f"returnable_plotter.pkl", 'wb') as f:
    pickle.dump(_returnable, f)

Could not locate checkpoint subfile. This is okay for analysis if the solvent trajectory is not needed, but not for production simulation!
DEBUG:openmmtools.multistate.multistateanalyzer:Isolating restraint force...
DEBUG:openmmtools.multistate.multistateanalyzer:No force of type <class 'openmmtools.forces.RadiallySymmetricRestraintForce'> could be found. The restraint will not be unbiased.
DEBUG:openmmtools.multistate.multistateanalyzer:Reading energies...
DEBUG:openmmtools.multistate.multistatereporter:read_replica_thermodynamic_states: iteration = [    0     1     2 ...  9998  9999 10000]
DEBUG:openmmtools.multistate.multistateanalyzer:Done.
DEBUG:openmmtools.multistate.multistateanalyzer:Assembling effective timeseries...
DEBUG:openmmtools.multistate.multistateanalyzer:Done.
DEBUG:openmmtools.multistate.multistateanalyzer:Could not find t0: None of the requested keys could be found on disk!
DEBUG:openmmtools.multistate.multistateanalyzer:Equilibration data:
DEBUG:openmmtools.multis

In [None]:
plt.plot(dg)
plt.fill_between(range(len(dg)),np.asarray(dg)-np.asarray(ddg),np.asarray(dg)+np.asarray(ddg),alpha=0.2)
plt.title(f"CYS -> SER (solvent)")
plt.xlabel()