In [1]:
from generationRNN.generate_molecules import GenerateMolecules
from affinityCNN.predict_affinity import AffinityPrediction
from rdkit import Chem
import numpy as np
from molCombiner.mol_comb import MolComb
import os
import sys

In [2]:
def progressbar(it, prefix="", size=60, file=sys.stdout):
    count = len(it)
    def show(j):
        x = int(size*j/count)
        file.write("%s[%s%s] %i/%i\r" % (prefix, "#"*x, "."*(size-x), j, count))
        file.flush()        
    show(0)
    for i, item in enumerate(it):
        yield item
        show(i+1)
    file.write("\n")
    file.flush()

In [3]:
fasta = "PEEIRPKEVYLDRKLLTLEDKELGSGNFGTVKKGYYQMKKVVKTVAVKILKNEANDPALKDELLAEANVMQQLDNPYIVRMIGICEAESWMLVMEMAELGPLNKYLQQNRHVKDKNIIELVHQVSMGMKYLEESNFVHRDLAARNVLLVTQHYAKISDFGLSKALRADENYYKAQTHGKWPVKWYAPECINYYKFSSKSDVWSFGVLMWEAFSYGQKPYRGMKGSEVTAMLEKGERMGCPAGCPREMYDLMNLCWTYDVENRPGFAAVELRLRNYYYDVVN"
comb = MolComb(fasta)
fitness = np.vectorize(lambda mol: comb.get_affinity(mol))

In [4]:
raw_data = np.load("molecules.npy")
smiles = []
for mol in raw_data:
    smiles.append(mol.replace('/', "").replace('@', "").replace('\\', "").replace('.', ""))
raw_data = smiles
k = 500
pop = np.array(raw_data[:k], dtype="object")

In [5]:
def remove_zeros(pop, f):
    f_pop = np.column_stack((pop, f))
    new_pop = (f_pop[f_pop[:, 1]!=0])[:, 0]
    new_f = (f_pop[f_pop[:, 1]!=0])[:, 1]
    return(new_pop, new_f)

In [6]:
history = np.empty((1,2))
gen = 1
while True:
    print("Starting generation " + str(gen))
    f = fitness(pop)
    print("Removing garbage values...")
    pop, f = remove_zeros(pop, f)
    print(f)
    
    bestf = np.array([f.min(), pop[f.argmin()]])
    history = np.append(history, bestf)
    
    print(history)
    print(f.mean())
    p = np.array((f.max() - f)**2 / ((f.max() - f)**2).sum(), dtype="float64")
    parents = np.random.choice(pop, size=(len(pop),2), p=p)
#     print(parents.shape)
#     print(parents)
    
    nextpop = []
    for row in progressbar(parents, "Computing children", 40):
        max_parent = max(comb.get_affinity(row[0]), comb.get_affinity(row[1]))
        combined = comb.combine(row[0], row[1])
        if (comb.get_affinity(combined) > max_parent):
            min_parent = min(row[0], row[1], key=lambda mol: comb.get_affinity(mol))
            nextpop.append(min_parent)
        else:
            nextpop.append(combined)

    
    pop = np.array(nextpop, dtype="object")
#     print(pop)
    gen = gen+1

Starting generation 1
Removing garbage values...
[116.10865783691406 225.95223999023438 546.6284790039062 1347.19384765625
 7259.9833984375 109.95478820800781 309.94140625 570.989501953125
 59.35866165161133 204.94285583496094 311.0536193847656 2755.837158203125
 352.2674865722656 883.0302734375 213.10987854003906 453.4189453125
 546.6284790039062 21.016788482666016 546.6284790039062 586.4612426757812
 408.240966796875 65.93086242675781 792.5347290039062 771.5624389648438
 65.769287109375 393.7694396972656 92.22515869140625 3415.30078125
 256.9771423339844 330.5718994140625 247.68687438964844 33.92982482910156
 335.9749755859375 287.8919982910156 78.53588104248047 81.42106628417969
 607.0009155273438 193.14730834960938 143.44276428222656 68.61591339111328
 546.6284790039062 1047.6258544921875 86.72251892089844 186.58645629882812
 93.49270629882812 148.16001892089844 576.2651977539062 3227.52880859375
 249.23025512695312 249.23025512695312 335.9749755859375 175.556396484375
 771.8504028

Computing children[######################################..] 471/489

KeyboardInterrupt: 

In [9]:
np.unique(f).shape

(298,)

In [11]:
f.shape

(489,)

In [12]:
    f_pop = np.column_stack((pop, f))

In [20]:
np.unique(f_pop[:, 1])

array([21.016788482666016, 25.486745834350586, 31.752124786376953,
       32.61952209472656, 33.92982482910156, 46.151153564453125,
       48.84087371826172, 50.76108932495117, 50.92770004272461,
       51.25412368774414, 56.52559280395508, 56.746334075927734,
       59.35866165161133, 61.25581359863281, 63.92950439453125,
       65.5389404296875, 65.769287109375, 65.93086242675781,
       68.15579223632812, 68.61591339111328, 68.70596313476562,
       71.76485443115234, 77.29441833496094, 77.96174621582031,
       78.07171630859375, 78.53588104248047, 79.02577209472656,
       81.42106628417969, 81.92536926269531, 86.68870544433594,
       86.7017822265625, 86.72251892089844, 87.22848510742188,
       88.3538818359375, 89.4210205078125, 91.96420288085938,
       91.98056030273438, 92.22515869140625, 93.49270629882812,
       93.91595458984375, 97.24806213378906, 97.70692443847656,
       98.06179809570312, 102.80058288574219, 104.19427490234375,
       104.38618469238281, 109.95478820

In [None]:
def remove_dups(pop, f):
    f_pop = np.column_stack((pop, f))
    new_pop = (f_pop[f_pop[:, 1]!=0])[:, 0]
    new_f = (f_pop[f_pop[:, 1]!=0])[:, 1]
    return(new_pop, new_f)