In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import itertools as it

from latticeproteins.thermodynamics import LatticeThermodynamics
from latticeproteins.interactions import miyazawa_jernigan
from latticeproteins.conformations import Conformations, ConformationList
from latticeproteins.sequences import find_differences, _residues

In [2]:
class PredictedLattice(object):
    """Lattice model predictor using epistasis."""
    def __init__(self, wildtype, temp, confs, target=None):
        self.wildtype = wildtype
        self.temp = temp
        self.conformations = confs
        self.target = target
        self._lattice = LatticeThermodynamics(self.temp, self.conformations)

        combos = []
        sites = list(range(self.conformations.length()))
        self.dG0 = round(self._lattice.stability(self.wildtype, target=self.target), 9)

        # Calculate first order coefs
        self.dGs = {}
        for i in sites:
            other_sites = sites[:]
            other_sites.remove(i)
            for aa in _residues:
                combos.append((i, aa))

        for c in combos:
            seq = list(self.wildtype[:])
            seq[c[0]] = c[1]
            # Calculate dG as dG_wt -
            self.dGs[c] = round(self._lattice.stability(seq, target=self.target), 9) - self.dG0

        # Calculate second order coefs
        combos = []
        sites = list(range(self.conformations.length()))
        for i in sites:
            other_sites = sites[:]
            other_sites.remove(i)
            for aa in _residues:
                for j in other_sites:
                    for aa2 in _residues:
                        combos.append((i,aa,j,aa2))

        for c in combos:
            seq = list(self.wildtype[:])
            seq[c[0]] = c[1]
            seq[c[2]] = c[3]
            # Calculate dG2
            self.dGs[c] = round(self._lattice.stability(seq, target=self.target), 9) - (self.dG0 + self.dGs[(c[0],c[1])]+ self.dGs[(c[2],c[3])])

    def stability(self, seq, target=None):
        # Get additive coefs to build predictions
        if target != self.target:
            raise Exception("Target does not match wildtype target.")
        loci = find_differences(self.wildtype, seq)
        add = [(pair[0], seq[pair[0]]) for pair in it.combinations(loci, 1)]
        pairs = [(pair[0], seq[pair[0]], pair[1], seq[pair[1]]) for pair in it.combinations(loci, 2)]
        dgs = add + pairs
        stability = float(self.dG0)
        for coef in dgs:
            stability += self.dGs[coef]
        return stability


def kmin2d(arr, k):
    """Return the indices of the n largest arguments in the 2d array."""
    n, m = arr.shape
    vec = arr.flatten()
    vec_ = vec.argsort()
    top_vec = vec_[:k]
    top_n = top_vec // n
    top_m = top_vec % n
    return top_n, top_m

In [3]:
def fracfolded(dG,temp):
    return 1.0 / (1.0 + np.exp(dG / temp))

def walk(seq, lattice, n_top_mutations=3, target=None):
    """"""
    # We create a matrix of possible mutations. Each row is site in the protein
    # and each column is one of the 20 possible amino acids. We calculate the
    # stability of each mutation in this matrix and choose the n_top_mutations.
    # Each of these new mutations become the new starting genotypes for the next
    # move. We repeat this process until all paths reach a dG max.
    
    dG0 = round(lattice.stability(seq, target=target), 9)
    ff = fracfolded(dG0, 1)
    unfinished_paths = [[seq]]
    unfinished_stabilities = [[dG0]]
    finished_paths = []
    finished_stabilities = []
    m = 0
    while len(unfinished_paths) != 0:
        finished = True
        updated_unfinished_paths = []
        updated_unfinished_stabilities = []
        updated_finished_paths = []
        updated_finished_stabilities = []
        # Iterate through paths
        for l, path in enumerate(unfinished_paths):
            # construct new trajectories
            path_stabilities = unfinished_stabilities[l]
            # We're grow our set of paths by n_top_mutations.
            new_paths = path * n_top_mutations
            mutant = list(path[-1])
            current_dG = path_stabilities[-1]

            # Construct grid of all stabilities of all amino acids at all sites
            AA_grid = np.array([_residues]*length)
            dG = np.zeros(AA_grid.shape, dtype=float)
            for (i,j), AA in np.ndenumerate(AA_grid):
                seq1 = mutant[:]
                seq1[i] = AA_grid[i,j]
                dG[i,j] = lattice.stability(seq1, target=target)

            dG = dG.round(decimals=9)
            # Find the top moves in the stability grid
            x, y = kmin2d(dG, n_top_mutations)

            # construct moves from top moves
            #new_paths = [path[:] for q in range(n_top_mutations)]
            #new_dGs = [path_stabilities[:] for q in range(n_top_mutations)]
            for k in range(n_top_mutations):
                # Check that the new moves are better than the current position
                newdG = dG[y[k], x[k]]
                if newdG < current_dG:
                    finished = False
                    new_move = mutant[:]
                    new_move[y[k]] = AA_grid[y[k], x[k]]
                    new_move = "".join(new_move)
                    # Add paths
                    updated_unfinished_paths.append(path + [new_move])
                    updated_unfinished_stabilities.append(path_stabilities + [newdG])
                # If there is no stability change, stay at the current position
                else:
                    finished_paths.append(path)
                    finished_stabilities.append(path_stabilities)
        m += 1
        unfinished_paths = updated_unfinished_paths
        unfinished_stabilities = updated_unfinished_stabilities
        print(m, len(unfinished_paths))

        
    return finished_paths, finished_stabilities

In [4]:
from latticeproteins.sequences import random_sequence

In [5]:
seq1 = "DKCQCNWCRKFTDQR"
temp = 1 
length = len(seq1)
target = "UUURDRURDDLLDR"
# Build the conformation database
confs = Conformations(length, "database")

In [6]:
# Create a lattice protein calculator with given temperature and conf database.
lattice = LatticeThermodynamics(temp, confs)
lattice_p = PredictedLattice(seq1, temp, confs, target=target)

In [19]:
n_top_mutations = 9
#seq1 = "TGKGIHSICNQLMEL" #"".join(random_sequence(15))
a, stabsa = walk(seq1, lattice, n_top_mutations=n_top_mutations, target=target)
#p, stabsp = walk(seq1, lattice_p, n_top_mutations=n_top_mutations, target=target)

1 3
2 10
3 24
4 50
5 91
6 132
7 162
8 177
9 120
10 40
11 26
12 0


In [18]:
n_top_mutations = 9
p, stabsp = walk(seq1, lattice_p, n_top_mutations=n_top_mutations, target=target)

1 3
2 10
3 30
4 78
5 147
6 175
7 146
8 101
9 62
10 33
11 0


In [20]:
import networkx as nx

# Construct networks


In [65]:
Ga = nx.DiGraph()
paths = np.unique(np.array(a))
for path in paths:
    for i, source in enumerate(path[:-1]):
        target_s = path[i+1]
        Ga.add_edge(source,target_s)
        
Gp = nx.DiGraph()
paths = np.unique(np.array(p))
for path in paths:
    for i, source in enumerate(path[:-1]):
        target_s = path[i+1]
        Gp.add_edge(source,target_s)
        

In [69]:
# Find all shared nodes.
anodes = set(Ga.nodes())
pnodes = set(Gp.nodes())
shared_nodes = anodes.intersection(pnodes)

# Finde shared edges.
aedges = set(Ga.edges())
pedges = set(Gp.edges())
shared_edges = aedges.intersection(pedges)

        
# ------------------------------------------
# Set shared edges
# ------------------------------------------
for s, t in Ga.edges():
    if (s,t) in shared_edges:
        Ga.edge[s][t]["shared"] = 1
    else:
        Ga.edge[s][t]["shared"] = 0

for s, t in Gp.edges():
    if (s,t) in shared_edges:
        Gp.edge[s][t]["shared"] = 1
    else:
        Gp.edge[s][t]["shared"] = 0
        
        
# ------------------------------------------
# Set shared nodes and their phenotype
# ------------------------------------------
for n in Ga.nodes():
    if n in shared_nodes:
        Ga.node[n]["shared"] = 1
    else:
        Ga.node[n]["shared"] = 0
    # Calculate dG
    Ga.node[n]["dG"] = lattice.stability(n, target=target) 
        
for n in Gp.nodes():
    if n in shared_nodes:
        Gp.node[n]["shared"] = 1
    else:
        Gp.node[n]["shared"] = 0
    Gp.node[n]["dG"] = lattice_p.stability(n, target=target) 


In [70]:
nx.write_gml(Gp, "/Users/Zsailer/Desktop/predicted.gml")
nx.write_gml(Ga, "/Users/Zsailer/Desktop/actual.gml")