In [1]:
from generationRNN.generate_molecules import GenerateMolecules
from affinityCNN.predict_affinity import AffinityPrediction
from rdkit import Chem
import numpy as np
from molCombiner.mol_comb import MolComb
import os
import sys

In [2]:
def progressbar(it, prefix="", size=60, file=sys.stdout):
    count = len(it)
    def show(j):
        x = int(size*j/count)
        file.write("%s[%s%s] %i/%i\r" % (prefix, "#"*x, "."*(size-x), j, count))
        file.flush()        
    show(0)
    for i, item in enumerate(it):
        yield item
        show(i+1)
    file.write("\n")
    file.flush()

In [3]:
fasta = "MAQTQGTKRKVCYYYDGDVGNYYYGQGHPMKPHRIRMTHNLLLNYGLYRKMEIYRPHKANAEEMTKYHSDDYIKFLRSIRPDNMSEYSKQMQRFNVGEDCPVFDGLFEFCQLSTGGSVASAVKLNKQQTDIAVNWAGGLHHAKKSEASGFCYVNDIVLAILELLKYHQRVLYIDIDIHHGDGVEEAFYTTDRVMTVSFHKYGEYFPGTGDLRDIGAGKGKYYAVNYPLRDGIDDESYEAIFKPVMSKVMEMFQPSAVVLQCGSDSLSGDRLGCFNLTIKGHAKCVEFVKSFNLPMLMLGGGGYTIRNVARCWTYETAVALDTEIPNELPYNDYFEYFGPDFKLHISPSNMTNQNTNEYLEKIKQRLFENLRMLPHAPGVQMQAIPEDAIPEESGDEDEEDPDKRISICSSDKRIACEEEFSDSDEEGEGGRKNSSNFKKAKRVKTEDEKEKDPEEKKEVTEEEKTKEEKPEAKGVKEEVKLA"
comb = MolComb(fasta)
fitness = np.vectorize(lambda mol: comb.get_affinity(mol))

In [4]:
raw_data = np.load("molecules.npy")
smiles = []
for mol in raw_data:
    smiles.append(mol.replace('/', "").replace('@', "").replace('\\', "").replace('.', ""))
raw_data = smiles
k = 500
pop = np.array(raw_data[:k], dtype="object")

In [5]:
def remove_zeros(pop, f):
    f_pop = np.column_stack((pop, f))
    new_pop = (f_pop[f_pop[:, 1]!=0])[:, 0]
    new_f = (f_pop[f_pop[:, 1]!=0])[:, 1]
    return(new_pop, new_f)

In [None]:
history = np.empty((1,2))
gen = 1
while True:
    print("Starting generation " + str(gen))
    f = fitness(pop)
    print("Removing garbage values...")
    pop, f = remove_zeros(pop, f)
    
    bestf = np.array([f.min(), pop[f.argmin()]])
    history = np.append(history, bestf)
    
    print(history)
    print(f.mean())
    p = np.array((f.max() - f)**2 / ((f.max() - f)**2).sum(), dtype="float64")
    parents = np.random.choice(pop, size=(len(pop),2), p=p)
#     print(parents.shape)
#     print(parents)
    
    nextpop = []
    for row in progressbar(parents, "Computing children", 40):
        max_parent = max(comb.get_affinity(row[0]), comb.get_affinity(row[1]))
        combined = comb.combine(row[0], row[1])
        if (comb.get_affinity(combined) > max_parent):
            min_parent = min(row[0], row[1], key=lambda mol: comb.get_affinity(mol))
            nextpop.append(min_parent)
        else:
            nextpop.append(combined)

    
    pop = np.array(nextpop, dtype="object")
#     print(pop)
    gen = gen+1

Starting generation 1
Removing garbage values...
['0.0' '1.0' '20.668718338012695'
 'CC(C)[CH]1CCCCN1C(=O)[CH](C)OC(=O)C(=O)Nc1ccc(Br)cc1']
8622.64667660141
Computing children[########################################] 500/500
Starting generation 2
Removing garbage values...
['0.0' '1.0' '20.668718338012695'
 'CC(C)[CH]1CCCCN1C(=O)[CH](C)OC(=O)C(=O)Nc1ccc(Br)cc1'
 '20.668718338012695'
 'CC(C)[CH]1CCCCN1C(=O)[CH](C)OC(=O)C(=O)Nc1ccc(Br)cc1']
1252.8075384765502
Computing children[########################################] 494/494
Starting generation 3
Removing garbage values...
['0.0' '1.0' '20.668718338012695'
 'CC(C)[CH]1CCCCN1C(=O)[CH](C)OC(=O)C(=O)Nc1ccc(Br)cc1'
 '20.668718338012695'
 'CC(C)[CH]1CCCCN1C(=O)[CH](C)OC(=O)C(=O)Nc1ccc(Br)cc1'
 '20.668718338012695'
 'CCC(=O)Nc1cc(C)nc(C2CCN(c3ncc(C(N)=O)c(NC4CCNCC4)c3C(N)=O)CC2)c1[N+](=O)[O-]']
326.7602217843236
Computing children[########################################] 491/491
Starting generation 4
Removing garbage values...
['0.0' '1.0'