# Figure for open walk

[Jump to analysis](#Main-Analysis)

In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import itertools as it

from latticeproteins.thermodynamics import LatticeThermodynamics
from latticeproteins.interactions import miyazawa_jernigan
from latticeproteins.conformations import ConformationList, Conformations
from latticeproteins.sequences import find_differences, _residues
from latticeproteins.evolve import monte_carlo_fixation_walk, fixation
from latticeproteins.sequences import random_sequence

In [5]:
# -------------------------------------------------
# Evolving functions
# -------------------------------------------------

def fixation(fitness1, fitness2, N=10e8, *args, **kwargs):
    """ Simple fixation probability between two organism with fitnesses 1 and 2.
    Note that N is the effective population size.
    .. math::
        p_{\\text{fixation}} = \\frac{1 - e^{-N \\frac{f_2-f_1}{f1}}}{1 - e^{-\\frac{f_2-f_1}{f1}}}
    """
    sij = (fitness2 - fitness1)/abs(fitness1)
    # Check the value of denominator
    denominator = 1 - np.exp(-N * sij)
    numerator = 1 - np.exp(- sij)
    # Calculate the fixation probability
    fixation = numerator / denominator
    if type(fixation) == np.ndarray:
        fixation = np.nan_to_num(fixation)
        fixation[sij < 0] = 0
    return fixation

class PredictedLattice(object):
    """Lattice model predictor using epistasis.
    """
    def __init__(self, wildtype, temp, confs, target=None):
        self.wildtype = wildtype
        self.temp = temp
        self.conformations = confs
        self.target = target
        self._lattice = LatticeThermodynamics(self.temp, self.conformations)

        combos = []
        sites = list(range(self.conformations.length()))
        self.dG0 = self._lattice.stability(self.wildtype, target=self.target)

        # Calculate first order coefs
        self.dGs = {}
        for i in sites:
            other_sites = sites[:]
            other_sites.remove(i)
            for aa in _residues:
                combos.append((i, aa))

        for c in combos:
            seq = list(self.wildtype[:])
            seq[c[0]] = c[1]
            # Calculate dG as dG_wt -
            self.dGs[c] = self._lattice.stability(seq, target=self.target) - self.dG0

        # Calculate second order coefs
        combos = []
        sites = list(range(self.conformations.length()))
        for i in sites:
            other_sites = sites[:]
            other_sites.remove(i)
            for aa in _residues:
                for j in other_sites:
                    for aa2 in _residues:
                        combos.append((i,aa,j,aa2))

        for c in combos:
            seq = list(self.wildtype[:])
            seq[c[0]] = c[1]
            seq[c[2]] = c[3]
            # Calculate dG2
            self.dGs[c] = self._lattice.stability(seq, target=self.target) - (self.dG0 + self.dGs[(c[0],c[1])]+ self.dGs[(c[2],c[3])])

    def stability(self, seq, target=None):
        # Get additive coefs to build predictions
        if target != self.target:
            raise Exception("Target does not match wildtype target.")
        loci = find_differences(self.wildtype, seq)
        add = [(pair[0], seq[pair[0]]) for pair in it.combinations(loci, 1)]
        pairs = [(pair[0], seq[pair[0]], pair[1], seq[pair[1]]) for pair in it.combinations(loci, 2)]
        dgs = add + pairs
        stability = float(self.dG0)
        for coef in dgs:
            stability += self.dGs[coef]
        return stability

    def fracfolded(self, seq, target=None):
        return 1.0 / (1.0 + np.exp(self.stability(seq, target=target) / self.temp))
    

def enumerate_walks(seq, lattice, selected_trait="fracfolded", max_mutations=5, target=None, self_transition=True):
    """Use Monte Carlo method to walk

    Parameters
    ----------
    seq : str
        seq
    lattice : LatticeThermodynamics object
        Lattice protein calculator
    selected_trait : str
        The trait to select.
    max_mutations : int (default = 15)
        Max number of mutations to make in the walk.
    target : str
        selected lattice target conformation. If None, the lattice will
        fold to the natural native conformation.
    """
    length = len(seq)
    fitness_method = getattr(lattice, selected_trait)
    fitness0 = fitness_method(seq, target=target)
    finished = False
    
    moves = [seq]
    fitnesses = [fitness0]
    edges = []
    z = 0
    while len(moves) != 0 and z < max_mutations:
        
        new_moves = []
        new_fitnesses = []
        
        for i, m in enumerate(moves):
            sequence = list(m[:])
            fitness0 = fitnesses[i]
            # Construct grid of all stabilities of all amino acids at all sites
            AA_grid = np.array([_residues]*length)
            fits = np.zeros(AA_grid.shape, dtype=float)
            for (i,j), AA in np.ndenumerate(AA_grid):
                seq1 = sequence[:]
                seq1[i] = AA_grid[i,j]
                fits[i,j] = fitness_method(seq1, target=target)

            # Calculate fitness for all neighbors in sequence space
            fix = fixation(fitness0, fits)*(1./fits.size) # multplied by flat prior for all mutations
            site, aa_index = np.where(fix > 0.01 * fix.max())
            
            AA = AA_grid[site, aa_index]
            FF = fits[site, aa_index]
            prob = fix[site, aa_index]
            
            for i in range(len(site)):
                move = sequence[:]
                move[site[i]] = AA[i]
                if move != sequence:
                    new_moves.append("".join(move))
                    new_fitnesses.append(FF[i])
                    edges.append((("".join(sequence[:]),"".join(move)), {"weight" : prob[i]}))
                
        moves, indices = np.unique(new_moves, return_index=True)
        fitnesses = np.array(new_fitnesses)[indices]
        z += 1
        print(z, len(new_moves), len(moves))
        if len(moves) > 2000:
            break
            
    return edges
    
# -------------------------------------------------
# Graphing/plotting code
# -------------------------------------------------  
    
from latticeproteins.sequences import hamming_distance

def flux_out_of_node(G, node_i):
    # Get flux coming from source
    total_flux_avail = G.node[node_i]["flux"]
    edges = {}
    # Normalize the transition probability from source
    norm = sum([G.edge[node_i][node_j]["weight"] for node_j in G.neighbors(node_i)])
    # Iterate over neighbors divvy up flux across neighbors
    for node_j in G.neighbors(node_i):
        fixation = G.edge[node_i][node_j]["weight"]
        dflux = (fixation/norm) * total_flux_avail
        if dflux > 0.01:
            G.edge[node_i][node_j]["delta_flux"] = dflux
            G.node[node_j]["flux"] += dflux
        else:
            G.edge[node_i][node_j]["delta_flux"] = 0
    return edges

def flux_from_source(G, source):
    # Reset the flux of each node
    init_flux = dict([(node, 0) for node in G.nodes()])
    nx.set_node_attributes(G, "flux", init_flux)
    G.node[source]["flux"] = 1
    # Add flux to each node.
    levels = ring_levels(G, source)
    for l in levels:
        for node_i in levels[l]:
            edges = flux_out_of_node(G, node_i)
            for key, flux_to_add in edges.items():
                node_i, node_j = key
                G.node[node_j]["flux"] += flux_to_add
    return G

from latticeproteins.sequences import hamming_distance

def ring_levels(G, root):
    levels = dict([(i,[]) for i in range(20)])
    levels[0].append(root)
    for node in G.nodes():
        neighbors = G.neighbors(node)
        for neigh in neighbors:
            key = hamming_distance(root, neigh)
            levels[key].append(neigh)
    for key, val in levels.items():
        levels[key] = set(val)
    return levels

def radial(r, theta):
    return (r*np.cos(theta), r*np.sin(theta))

def ring_position(G, root):
    levels = ring_levels(G, root)
    pos = {}
    for i in range(len(levels)):
        nodes = levels[i]
        for j, node in enumerate(nodes):
            angle = 2*np.pi / len(nodes)
            pos[node] = radial(i, j*angle)
    return pos

# Main Analysis

Saving examples 
'YFDWKLMMAKPH'
'PFFMKMVDHKMF'

Best option:
`DHKWFHFMMDLH`

In [24]:
length = 12
#seq = "".join(random_sequence(length))
seq = 'DHKWFHFMMDLH'
c = Conformations(length, "database")
temp = 1
cs = c.k_lowest_confs(seq, temp, 2)
target = cs[0]
db1 = cs[0]
db2 = cs[1]

length = len(seq)
c2 = [db1, db2]#"U"*length]
confs = ConformationList(length, c2)

# Create a lattice protein calculator with given temperature and conf database.
lattice = LatticeThermodynamics(temp, c)

# Equilibrate
n_muts = 30
paths, fitness, probs = monte_carlo_fixation_walk(seq, lattice,
    selected_trait="fracfolded",
    max_mutations=n_muts,
    target=target, 
    self_transition=False)

seq = paths[-6]
print(seq)

DHKWFHFMMDLH


  denominator = 1 - np.exp(-N * sij)
  fixation = numerator / denominator
  p = fix / fix.sum()
  sample_indices = _np.random.choice(index_arr, size=size, replace=replace, p=parr)


In [25]:
edges0 = enumerate_walks(seq, lattice, target=target, max_mutations=7, self_transition=False)

  del sys.path[0]
  app.launch_new_instance()


1 7 7
2 45 31
3 194 107
4 799 363


KeyboardInterrupt: 

In [26]:
plattice = LatticeThermodynamics(temp, confs)
edges0 = enumerate_walks(seq, plattice, target=target, max_mutations=7, self_transition=False)

  del sys.path[0]
  app.launch_new_instance()


1 22 22
2 537 327
3 7639 2658


In [19]:
plattice = PredictedLattice(seq, temp, confs, target=target)
edges2 = enumerate_walks(seq, plattice, target=target, max_mutations=7, self_transition=False)

  del sys.path[0]
  app.launch_new_instance()


1 20 20
2 383 213
3 3617 1095
4 15742 2637


# Probability flux through node *j*

$$
F_j = \frac{\pi_{i \rightarrow j}}{Z} \cdot F_i
$$

$$
Z = \sum_{k < i} \pi_{i \rightarrow k}
$$

# Plotting

In [116]:
# -------------------------------------------------
# Graphing/plotting code
# -------------------------------------------------  
    
from latticeproteins.sequences import hamming_distance

def flux_out_of_node(G, node_i):
    # Get flux coming from source
    total_flux_avail = G.node[node_i]["flux"]
    edges = {}
    # Normalize the transition probability from source
    norm = sum([G.edge[node_i][node_j]["weight"] for node_j in G.neighbors(node_i)])
    # Iterate over neighbors divvy up flux across neighbors
    for node_j in G.neighbors(node_i):
        fixation = G.edge[node_i][node_j]["weight"]
        dflux = (fixation/norm) * total_flux_avail
        if dflux > 0.01:
            G.edge[node_i][node_j]["delta_flux"] = dflux
            G.node[node_j]["flux"] += dflux
        else:
            G.edge[node_i][node_j]["delta_flux"] = 0
    return edges

def flux_from_source(G, source):
    # Reset the flux of each node
    init_flux = dict([(node, 0) for node in G.nodes()])
    nx.set_node_attributes(G, "flux", init_flux)
    G.node[source]["flux"] = 1
    # Add flux to each node.
    levels = ring_levels(G, source)
    for l in levels:
        for node_i in levels[l]:
            edges = flux_out_of_node(G, node_i)
            for key, flux_to_add in edges.items():
                node_i, node_j = key
                G.node[node_j]["flux"] += flux_to_add
    return G

from latticeproteins.sequences import hamming_distance

def ring_levels(G, root):
    levels = dict([(i,[]) for i in range(20)])
    levels[0].append(root)
    for node in G.nodes():
        neighbors = G.neighbors(node)
        for neigh in neighbors:
            key = hamming_distance(root, neigh)
            levels[key].append(neigh)
    for key, val in levels.items():
        levels[key] = set(val)
    return levels

def radial(r, theta):
    return (r*np.cos(theta), r*np.sin(theta))

def ring_position(G, root):
    levels = ring_levels(G, root)
    pos = {}
    for i in range(len(levels)):
        nodes = levels[i]
        for j, node in enumerate(nodes):
            angle = 2*np.pi / len(nodes)
            pos[node] = radial(i, j*angle)
    return pos

def build_graphs(edges1, edges2):
    """Construct two different networks from a set of edges.
    """
    # -----------------------------------------------
    # build initial graphs
    # -----------------------------------------------
    edges0 = edges1
    # Build Graph
    G0 = nx.DiGraph()
    for key, weight in edges0:
        i,j = key[0], key[1]
        G0.add_edge(i,j, weight=weight["weight"])

    # Build Graph
    G2 = nx.DiGraph()
    for key, weight in edges2:
        i,j = key[0], key[1]
        G2.add_edge(i,j, weight=weight["weight"])

    # -----------------------------------------------
    # Calculate the flux at each node and edge
    # -----------------------------------------------
    G0 = flux_from_source(G0, seq)
    G2 = flux_from_source(G2, seq)

    # Remove nodes that have small flux
    nodes_to_remove = []
    for node in G0.nodes():
        if G0.node[node]["flux"] < 0.001:
            nodes_to_remove.append(node)

    nodes_to_remove2 = []
    for node in G2.nodes():
        if G2.node[node]["flux"] < 0.001:
            nodes_to_remove2.append(node)

    G0.remove_nodes_from(nodes_to_remove)
    G2.remove_nodes_from(nodes_to_remove2)

    # Get a dictionary of change in fluxes along each edge.
    edges_0 = dict([((i, j), G0.edge[i][j]["delta_flux"]) for i,j in G0.edges()])
    edges_2 = dict([((i, j), G2.edge[i][j]["delta_flux"]) for i,j in G2.edges()])

    # -----------------------------------------------
    # Calculate the change in delta_flux on each edge
    # -----------------------------------------------
    edges_diff = {}
    # See what edges we lost
    for key, val in edges_0.items():
        if key in edges_2:
            weight = edges_2[key] - edges_0[key]
            if weight < 0:
                # This edge gained flux
                color = "r"
            else:
                # This edge lost flux
                color = "b"
            edges_diff[key] = dict(color=color, weight=abs(weight))
        else:
            # This edge was lost in our predictions
            edges_diff[key] = dict(weight=val, color="r")

    # See what edges we gained.
    for key, val in edges_2.items():
        if key in edges_0:
            pass
        else:
            # This edge was gained in our predictions
            edges_diff[key] = dict(weight=val, color="b")

    # -----------------------------------------------
    # Calculate the change in flux at each node
    # -----------------------------------------------
    nodes_0 = dict([(i, G0.node[i]["flux"]) for i in G0.nodes()])
    nodes_2 = dict([(i, G2.node[i]["flux"]) for i in G2.nodes()])

    node_diff = {}
    for key, val in nodes_0.items():
        if key in nodes_2:
            diff = nodes_2[key] - val
            if diff > 0:
                color = "b"
            else:
                color = "r"
            node_diff[key] = dict(color=color, outer=nodes_2[key], inner=val)
        else:
            node_diff[key] = dict(color="r", outer=nodes_0[key], inner=0)

    for key, val in nodes_2.items():
        if key in nodes_0:
            pass
        else:
            node_diff[key] = dict(color="b", outer=val, inner=0)

    # -----------------------------------------------
    # Construct a network of differences
    # -----------------------------------------------
    Gdiff = nx.DiGraph()
    for key, val in edges_diff.items():
        Gdiff.add_edge(key[0],key[1],**val)

    for key, val in node_diff.items():
        Gdiff.node[key].update(**val)
        
    return G0, G2, Gdiff

def plot_networks(G1, G2, Gdiff):
    """"""
    # options
    node_scale = 600
    edge_scale = 75
    node_color = "k"
    
    from matplotlib.gridspec import GridSpec
    from matplotlib.patches import Circle
    
    def draw_circles(ax):
        """Draw circles add increasing hamming distances for each network."""
        for i in range(0,7):
            circle = Circle((0, 0), i, facecolor='none',
                    edgecolor="k", linewidth=.5, alpha=0.5, linestyle="--")
            ax.add_patch(circle)

    
    # Initialize a figure
    fig = plt.figure(figsize=(10,30))
    
    # Initialize a gridspec
    gs = GridSpec(3, 1)
       
    # Calculate the positions for all nodes on rings
    pos = ring_position(Gdiff, seq)

    # -------------------------------------------------
    # Draw the first network
    # -------------------------------------------------
    
    ax1 = plt.subplot(gs[0, 0])
    
    # Set the widths of the edges to the delta flux attribute of each edge.
    edge_widths = np.array([G1.edge[i][j]["delta_flux"] for i,j in G1.edges()])
    edge_widths = edge_widths * edge_scale
    #edge_widths = np.ma.log10(edge_widths).filled(0) * edge_scale
    nx.draw_networkx_edges(G1, pos=pos, ax=ax1,
        width=edge_widths,                
        arrows=False,
        edge_color="gray",
        alpha=0.5
    )
    
    # Set the node sizes to the amount of flux passing through each node.
    node_size = [G1.node[i]["flux"] * node_scale for i in G1.nodes()]
    #node_size = np.ma.log10(node_size).filled(0)

    nx.draw_networkx_nodes(G1, pos=pos, ax=ax1,
        node_size=node_size,                
        linewidths=None,
        node_color=node_color
    )
    
    bad_nodes1 = [node for node in Gdiff.nodes() if node not in G1.nodes()]

    nx.draw_networkx_nodes(Gdiff, pos=pos, ax=ax1,
        nodelist = bad_nodes1,
        node_shape = "x",
        node_size = node_scale*.25,
        linewidths = None,
        node_color = "m"
    )
    
    # Draw circles
    draw_circles(ax1)
    ax1.axis("equal")
    ax1.axis("off")
    
    # -------------------------------------------------
    # Draw the second network
    # -------------------------------------------------
    
    ax2 = plt.subplot(gs[1, 0])

    
    # Set the widths of the edges to the delta flux attribute of each edge.
    edge_widths = np.array([G2.edge[i][j]["delta_flux"] for i,j in G2.edges()])
    edge_widths = edge_widths * edge_scale
    #edge_widths = np.ma.log10(edge_widths).filled(0) * edge_scale
    nx.draw_networkx_edges(G2, pos=pos, ax=ax2,
        width=edge_widths,                
        arrows=False,
        edge_color="gray",
        alpha=0.5
    )
    
    # Set the node sizes to the amount of flux passing through each node.
    node_size = [G2.node[i]["flux"] * node_scale for i in G2.nodes()]
    #node_size = np.ma.log10(node_size).filled(0)

    nx.draw_networkx_nodes(G2, pos=pos, ax=ax2,
        node_size=node_size,                
        linewidths=None,
        node_color=node_color
    )

    bad_nodes2 = [node for node in Gdiff.nodes() if node not in G2.nodes()]

    nx.draw_networkx_nodes(Gdiff, pos=pos, ax=ax2,
        nodelist = bad_nodes2,
        node_shape = "x",
        node_size = node_scale*.25,
        linewidths = None,
        node_color = "m"
    )
        
    # Draw circles
    draw_circles(ax2) 
    ax2.axis("equal")
    ax2.axis("off")
    
    # -------------------------------------------------
    # Draw difference network
    # -------------------------------------------------
        
    ax3 = plt.subplot(gs[2, 0])

    
    # Set the widths of the edges to the delta flux attribute of each edge.
    edge_widths = np.array([Gdiff.edge[i][j]["weight"] for i,j in Gdiff.edges()])
    edge_widths = edge_widths * edge_scale
    #edge_widths = np.ma.log10(edge_widths).filled(0) * edge_scale 
    edge_color = [Gdiff.edge[i][j]["color"] for i,j in Gdiff.edges()]

    nx.draw_networkx_edges(Gdiff, pos=pos, ax=ax3,
        width=edge_widths,                
        arrows=False,
        edge_color=edge_color,
        alpha=0.5
    )
    
    # Set the node sizes to the amount of flux passing through each node.
    node_size = [Gdiff.node[i]["outer"] * node_scale for i in Gdiff.nodes()]
    node_color = [Gdiff.node[i]["color"]  for i in Gdiff.nodes()]
    nx.draw_networkx_nodes(Gdiff, pos=pos, ax=ax3,
        node_size=node_size,                
        linewidths=None,
        node_color=node_color
    )

    # Set the node sizes to the amount of flux passing through each node.
    node_size = [Gdiff.node[i]["inner"] * node_scale for i in Gdiff.nodes()]
    nx.draw_networkx_nodes(Gdiff, pos=pos, ax=ax3,
        node_size=node_size,                
        linewidths=None,
        node_color="w"
    )    
    
    # Draw circles
    draw_circles(ax3) 

    ax3.axis("equal")
    ax3.axis("off")
    return fig

In [8]:
from open_walk_analysis import *

In [9]:
G0, G2, Gdiff = build_graphs(edges0, edges2)
fig = plot_networks(G0, G2, Gdiff)

NameError: name 'edges2' is not defined

[Jump back up](#Main-Analysis)

In [115]:
seq

'DHKWFHFMMDLH'