/
string_crossover.py
134 lines (108 loc) · 3.08 KB
/
string_crossover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
'''
Written by Emilie S. Henault and Jan H. Jensen 2019
'''
from rdkit import Chem
from rdkit.Chem import AllChem
import random
import numpy as np
from rdkit import rdBase
rdBase.DisableLog('rdApp.error')
import deepsmiles
converter = deepsmiles.Converter(rings=True, branches=True)
from selfies import encoder, decoder
def string_OK(string):
mol = string2mol(string)
if not mol:
return False
try:
Chem.SanitizeMol(mol)
test_mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))
if test_mol == None:
return None
target_size = size_stdev*np.random.randn() + average_size #parameters set in GA_mol
if mol.GetNumAtoms() > 5 and mol.GetNumAtoms() < target_size:
return True
else:
return False
except:
return False
def cut_point(parent):
m = random.randint(0, len(parent) - 1)
return m
def string2list(string):
if string_type == 'SELFIES':
return string.split('][')
else:
return list(string)
def mol2string(mol):
smiles = Chem.MolToSmiles(mol)
if string_type == 'SELFIES':
return encoder(smiles).split('][')
if string_type == 'DeepSMILES':
string = converter.encode(smiles)
return list(string)
return list(smiles)
def list2string(list):
if string_type == 'SELFIES':
string = ']['.join(list)
else:
string = ''.join(list)
return string
def smiles2string(smiles):
if string_type == 'SMILES':
string = smiles
if string_type == 'SELFIES':
try:
string = encoder(smiles,PrintErrorMessage=False)
except:
return None
if string_type == 'DeepSMILES':
try:
string = converter.encode(smiles)
except deepsmiles.DecodeError as e:
return None
return string
def string2smiles(string):
if string_type == 'SMILES':
smiles = string
if string_type == 'SELFIES':
try:
smiles = decoder(string,PrintErrorMessage=False)
except:
return None
if string_type == 'DeepSMILES':
try:
smiles = converter.decode(string)
except:
return None
return smiles
def string2mol(string):
smiles = string2smiles(string)
if smiles == None:
return None
try:
mol = Chem.MolFromSmiles(smiles)
return mol
except:
return None
def crossover(parent_a,parent_b):
#parent_a, parent_b = string2list(parent_a), string2list(parent_a)
for _ in range(50):
cut_point_a = cut_point(parent_a)
cut_point_b = cut_point(parent_b)
a1 = parent_a[0:cut_point_a]
b2 = parent_b[cut_point_b:len(parent_b)]
child_string = a1 + b2
#print(child_string,Chem.MolToSmiles(child_mol),child_mol,co.mol_OK(child_mol))
if string_OK(child_string):
return child_string
return None
if __name__ == "__main__":
average_size = 39.15
size_stdev = 3.50
string_type = 'SELFIES'
parent_a = 'CCCCCCCC'
parent_b = 'OCCCCCCO'
#parent_b = 'OCCCCCCc1ccccc1'
child = crossover(parent_a,parent_b)
print(child)