ESOL data

1. No lost through standartization - 1128 molecules
2. 5 fully duplicated rows have been removed -> 1123 rows
3. After removing strange molecules 1064 molecules have left
4. Among 6 duplicated smiles the most commong logS left -> final size = 1058 entries


FreeSolv data

1. No lost through standartization - 642 molecules
2. No duplicated rows/values have been obtained
3. After removing strange molecules 565 molecules have left


In [68]:
from pathlib import Path
import pandas as pd
import os
import numpy as np

In [69]:
import sys
sys.path
sys.path.append('../scripts/preprocess')
from standardize_smiles import StandardizeTautomers

In [70]:
INPUT_PATH = Path("../data/1_filtering/esol_freesolv")

OUTPUT_PATH = Path("../data/2_standardize/esol_freesolv")

In [71]:
StandardizeTautomers().standardize(INPUT_PATH, OUTPUT_PATH)

2020-09-21 11:23:32.923 | INFO     | standardize_smiles:standardize:38 - Processing freesolv.csv
100%|██████████| 642/642 [00:01<00:00, 598.09it/s]
2020-09-21 11:23:34.105 | INFO     | standardize_smiles:standardize:38 - Processing esol.csv
100%|██████████| 1128/1128 [00:07<00:00, 150.71it/s]


In [72]:
with os.scandir(INPUT_PATH) as entries:
    initial_datasets = dict([(entry.name, pd.read_csv(entry, index_col=0)) \
                     for entry in entries if entry.is_file()])

In [73]:
with os.scandir(OUTPUT_PATH) as entries:
    standardized_datasets = dict([(entry.name, pd.read_csv(entry, index_col=0)) \
                     for entry in entries if entry.is_file() and '.csv' in entry.name])

In [74]:
datasets_stats = pd.DataFrame([{'Dataset name' : dataset_name, \
  'Initial size' : initial_datasets[dataset_name].shape[0], \
  'Standardized size' : standardized_datasets[dataset_name].shape[0], \
  '% of loss' : round((initial_datasets[dataset_name].shape[0]-standardized_datasets[dataset_name].shape[0]) \
                                /standardized_datasets[dataset_name].shape[0]*100,1)} \
 for dataset_name in initial_datasets.keys()] + \
[{'Dataset name' : 'Total amount', \
  'Initial size' : np.sum([dataset.shape[0] for data_name, dataset in initial_datasets.items()]), \
  'Standardized size' : np.sum([dataset.shape[0] for data_name, dataset in standardized_datasets.items()]), \
  '% of loss' : round((np.sum([dataset.shape[0] for data_name, dataset in initial_datasets.items()])- \
                       np.sum([dataset.shape[0] for data_name, dataset in standardized_datasets.items()])) \
                                /np.sum([dataset.shape[0] for data_name, dataset in standardized_datasets.items()])*100,1)}])

In [75]:
datasets_stats

Unnamed: 0,Dataset name,Initial size,Standardized size,% of loss
0,freesolv.csv,642,642,0.0
1,esol.csv,1128,1128,0.0
2,Total amount,1770,1770,0.0


# Check duplicates

## Esol

In [76]:
DATA_PATH = Path("../data/2_standardize/esol_freesolv")

FILENAME = 'esol.csv'

In [77]:
SMILES_COLUMN = 'smiles'
VALUE_COLUMN = 'logS'

In [78]:
esol_data = pd.read_csv(os.path.join(DATA_PATH, FILENAME))

In [79]:
esol_data.head()

Unnamed: 0,smiles,logS
0,N#CC(OC1OC(COC2OC(CO)C(O)C(O)C2O)C(O)C(O)C1O)c...,-0.77
1,Cc1occc1C(=O)Nc1ccccc1,-3.3
2,CC(C)=CCCC(C)=CC=O,-2.06
3,c1ccc2c(c1)ccc1c2ccc2c3ccccc3ccc21,-7.87
4,c1ccsc1,-1.33


In [80]:
esol_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1128 entries, 0 to 1127
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  1128 non-null   object 
 1   logS    1128 non-null   float64
dtypes: float64(1), object(1)
memory usage: 17.8+ KB


In [81]:
esol_data_wo_dup = esol_data.drop_duplicates()

In [82]:
esol_data_wo_dup.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1123 entries, 0 to 1127
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  1123 non-null   object 
 1   logS    1123 non-null   float64
dtypes: float64(1), object(1)
memory usage: 26.3+ KB


In [83]:
esol_data_wo_dup[SMILES_COLUMN].duplicated().sum()

6

In [84]:
PROCESSED_PATH = "../data/3_final_data"

esol_data_wo_dup.to_csv(os.path.join(PROCESSED_PATH, 'esol.csv'), index = False)

## FreeSolv

In [99]:
DATA_PATH = Path("../data/2_standardize/esol_freesolv")

FILENAME = 'freesolv.csv'

In [100]:
SMILES_COLUMN = 'smiles'
VALUE_COLUMN = 'Energy'

In [101]:
freesolv_data = pd.read_csv(os.path.join(DATA_PATH, FILENAME))

In [102]:
freesolv_data.head()

Unnamed: 0,smiles,Energy
0,COc1ccc(C(=O)N(C)C)cc1,-11.01
1,CS(=O)(=O)Cl,-4.87
2,C=CC(C)C,1.83
3,CCc1cnccn1,-5.45
4,CCCCCCCO,-4.21


In [103]:
freesolv_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 642 entries, 0 to 641
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  642 non-null    object 
 1   Energy  642 non-null    float64
dtypes: float64(1), object(1)
memory usage: 10.2+ KB


In [104]:
freesolv_data_wo_dup = freesolv_data.drop_duplicates()

In [105]:
freesolv_data_wo_dup.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 642 entries, 0 to 641
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  642 non-null    object 
 1   Energy  642 non-null    float64
dtypes: float64(1), object(1)
memory usage: 15.0+ KB


In [106]:
freesolv_data_wo_dup[SMILES_COLUMN].duplicated().sum()

0

In [107]:
PROCESSED_PATH = "../data/3_final_data"

freesolv_data_wo_dup.to_csv(os.path.join(PROCESSED_PATH, 'freesolv.csv'), index = False)

# Remove strange mols 

In [85]:
from remove_strange_mols import remove_strange_mols

## ESOL

In [86]:
DATA_PATH = "../data/3_final_data"
FILENAME = 'esol.csv'

In [87]:
remove_strange_mols(os.path.join(DATA_PATH, FILENAME), os.path.join(DATA_PATH, FILENAME))

2020-09-21 11:23:46.831 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule N#CC(OC1OC(COC2OC(CO)C(O)C(O)C2O)C(O)C(O)C1O)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.832 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1occc1C(=O)Nc1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.833 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)=CCCC(C)=CC=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.834 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2c(c1)ccc1c2ccc2c3ccccc3ccc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.835 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccsc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.836 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2sc

2020-09-21 11:23:46.884 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COC(=O)C1=C(C)N=C(C)C(C(=O)OC)C1c1ccccc1[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.885 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc2ccc(C)nc2c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.885 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#CCCCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.886 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC1(C2=CCCCC2)C(=O)NC(=O)NC1=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.887 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2c(c1)ccc1c3ccccc3ccc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.888 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(

2020-09-21 11:23:46.937 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC=C(c1ccc(O)cc1)C(CC)c1ccc(O)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.938 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule N#Cc1c(Cl)c(Cl)c(Cl)c(C#N)c1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.939 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccc(Cl)c(-c2ccc(Cl)c(Cl)c2)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.940 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc(C2CO2)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.941 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.942 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC12CCC(=O)C=C1CCC

2020-09-21 11:23:46.989 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1cccc2ccccc12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.990 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cccc(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.990 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=c1nc2nccnc2c[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.991 | DEBUG    | remove_strange_mols:check_not_strange_mols:40 - The molecule CO is in the list of very soluble in water molecules
2020-09-21 11:23:46.992 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC1(CCC(C)C)C(=O)NC(=O)NC1=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:46.993 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(C)=O isn't in the list of very soluble in water molecules
2020-0

2020-09-21 11:23:47.042 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COCC(=O)N(c1c(C)cccc1C)C(C)C(=O)OC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.043 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CNC(=O)Oc1ccccc1OC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.044 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(C)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.045 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1ccc2ccccc2c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.046 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)Oc1cc(-n2nc(C(C)(C)C)oc2=O)c(Cl)cc1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.048 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#CCCCC isn't in the 

2020-09-21 11:23:47.096 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1ccc(Cl)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.096 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=C1CCCCC1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.097 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cccc(N)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.098 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule N#CC(Cl)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.099 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CNc1cnn(-c2cccc(C(F)(F)F)c2)c(=O)c1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.102 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCCCC(C)=O isn't in the list of very soluble in wate

2020-09-21 11:23:47.146 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc(CCc2ccccc2)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.147 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClC(Cl)C(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.148 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN1c2cc(OC)cc(C)c2NC(=O)c2cccnc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.149 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc2c(ccc3ccccc32)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.154 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCOC(=O)c1ccccc1C(=O)OCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.155 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1c(O)c(Cl)c(Cl)c(Cl)

2020-09-21 11:23:47.206 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccccc1C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.208 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1cccc(Cl)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.209 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)CC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.213 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2c(c1)oc1ccccc12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.214 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOC1Oc2ccc(OS(C)(=O)=O)cc2C1(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.215 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CN(C)C(=O)Nc1cccc(C(F)(F)F)c1 isn't in the list of

2020-09-21 11:23:47.275 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCC(=O)OCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.277 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule OCC(O)C1OC2OC(C(Cl)(Cl)Cl)OC2C1O isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.278 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(N=CN(C)C=Nc2ccc(C)cc2C)c(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.283 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1nc(=NC(C)C)[nH]c(=NC(C)C)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.284 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=CCCCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.285 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(N)cc1 isn't

2020-09-21 11:23:47.347 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1csc(=S)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.348 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=c1c(O)c(-c2ccc(O)cc2O)oc2cc(O)cc(O)c12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.350 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=C1C2(Cl)C3(Cl)C4(Cl)C(Cl)(Cl)C5(Cl)C3(Cl)C1(Cl)C5(Cl)C24Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.351 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN(CC)C(=S)SSC(=S)N(CC)CC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.352 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C1CCCCC1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.354 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The mo

2020-09-21 11:23:47.414 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1nc(N(C)C)nc(OC(=O)N(C)C)c1C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.415 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCBr isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.416 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.418 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.419 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCc1cccc(C)c1N(C(=O)CCl)C(C)COC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.420 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=[N+]([O-]

2020-09-21 11:23:47.482 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)CCO isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.483 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=Cc1ccc2c(c1)OCO2 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.484 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.485 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=Cc1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.486 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=C(C)C(=C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.488 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOC(=O)CCN(SN(C)C(=O)Oc1cccc2c1OC(C)(C)C2)C(C)C isn't in the list of very soluble in 

2020-09-21 11:23:47.550 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1ccccc1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.551 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1cccc(Cl)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.552 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCN(CC)C(=O)SCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.555 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCOC=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.556 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC12C=CC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.557 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClC(Cl)Br isn't in the list of very soluble

2020-09-21 11:23:47.617 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1cccc([N+](=O)[O-])c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.618 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.622 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=CNC(N1C=CN(C(NC=O)C(Cl)(Cl)Cl)C=C1)C(Cl)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.623 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cn1cc(-c2ccccc2)c(=O)c(-c2cccc(C(F)(F)F)c2)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.625 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1cc2c3ccccc3ccc2c2ccccc12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.627 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The mo

2020-09-21 11:23:47.686 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.687 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=CCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.688 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccc(Cl)c(-c2cc(Cl)c(Cl)c(Cl)c2Cl)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.689 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule N=c1cc[nH]c(=O)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.691 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)(Cl)C(F)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.691 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC#N isn't in the list of very soluble in water 

2020-09-21 11:23:47.753 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(=O)OC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.754 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Brc1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.755 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOC(=O)c1ccc(O)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.756 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCC(=O)OCN1C(=O)NC(c2ccccc2)(c2ccccc2)C1=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.758 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(=O)OC1CCC2C3CC=C4CC(=O)CCC4(C)C3CCC12C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.759 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1cc2ccc3ccc4ccc

2020-09-21 11:23:47.823 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1[nH]c(O)c2ccccc12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.825 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule NS(=O)(=O)c1cc2c(cc1Cl)NC(C(Cl)Cl)NS2(=O)=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.826 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC=C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.828 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(C)c(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.829 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1cc(Cl)c(Cl)cc1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.830 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2c(c1)cnc1ccccc12 isn't in the l

2020-09-21 11:23:47.890 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN1c2ncccc2C(=S)N(C)c2cccnc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.891 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN1C(=O)c2ccccc2Oc2ccccc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.892 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C1CCOCC1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.893 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#CCCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.895 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1ccc2ccc(=O)oc2c1CC=C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.896 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc2c(ccc3c4c5c(cccc5cc32)CC4)c1 isn't

2020-09-21 11:23:47.951 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)COC=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.952 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CSc1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.954 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN1c2ncccc2C(=O)Nc2c(C(F)(F)F)cc(C)nc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.955 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.956 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COC(=O)c1cccnc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:47.957 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule NS(=O)(=O)c1cc2c(cc1C(F)(F)F)NC(Cc1ccccc1)NS2(=O)=O isn't in 

2020-09-21 11:23:48.021 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOC(=O)CC isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.022 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CSc1nc(=NC(C)C)[nH]c(=NC(C)C)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.023 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#CC(C)N(C)C(=O)Nc1ccc(Cl)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.025 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cc2ccccc2cc1C isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.026 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccc(-c2cc(Cl)ccc2Cl)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.028 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccc(-c2cc(Cl)c(Cl)c(Cl

2020-09-21 11:23:48.086 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)OC(=O)Nc1cccc(Cl)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.087 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCN1c2ncccc2C(=O)N(C)c2ccc(Cl)nc21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.088 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CNC(=O)Oc1cccc2ccccc12 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.089 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#C isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.090 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cncc(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.091 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C1=CCC=CC1 isn't in the list of very soluble in

2020-09-21 11:23:48.160 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1cnn(-c2ccccc2)c(=O)c1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.161 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(C)(C)O isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.162 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(O)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.163 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOC=O isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.164 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CN(C)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.165 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C1CCC2CCCCC2C1 isn't in the list of very soluble in water molecules
2020-09

2020-09-21 11:23:48.227 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CNC(=O)Oc1cc(C)cc(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.228 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClC1=C(Cl)C2(Cl)C3C(Cl)C(Cl)CC3C1(Cl)C2(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.229 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CSSC isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.233 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule NC(=O)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.234 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccccc1Br isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.235 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COC(=O)c1ccccc1OC1OC(COC2OCC(O)C(O)C2O)C(O)C(

2020-09-21 11:23:48.294 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CNC(=O)Oc1cc(C(C)(C)C)cc(C(C)(C)C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.295 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=C1CC(c2ccc(O)c(O)c2)Oc2cc(O)cc(O)c21 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.296 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=C(c1ccccc1)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.298 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCCCCCCCCCCCCCCC isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.299 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc(NNc2ccccc2)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:23:48.300 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(CC)

2020-09-21 11:23:48.376 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(C)CO are allowed
2020-09-21 11:23:48.378 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule N#Cc1ccccc1 are allowed
2020-09-21 11:23:48.380 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOP(=S)(OCC)Oc1cc(C)nc(C(C)C)n1 are allowed
2020-09-21 11:23:48.381 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCCCC(C)O are allowed
2020-09-21 11:23:48.384 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1ccc(-c2c(Cl)ccc(Cl)c2Cl)c(Cl)c1 are allowed
2020-09-21 11:23:48.386 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=c1[nH]c2c(c(=O)n1C1CCCCC1)CCC2 are allowed
2020-09-21 11:23:48.388 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOP(=S)(OCC)SCSCC are allowed
202

2020-09-21 11:23:48.486 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CCC1(c2ccccc2)C(=O)NC(=O)NC1=O are allowed
2020-09-21 11:23:48.487 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCC(=O)OCC are allowed
2020-09-21 11:23:48.489 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OCCC(C)C are allowed
2020-09-21 11:23:48.491 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCC(=O)OCN1C(=O)NC(c2ccccc2)(c2ccccc2)C1=O are allowed
2020-09-21 11:23:48.493 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cccc(-c2cc(Cl)ccc2Cl)c1 are allowed
2020-09-21 11:23:48.495 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCBr are allowed
2020-09-21 11:23:48.497 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2

2020-09-21 11:23:48.594 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC12CCC(=O)C=C1CCC1C3CCC(O)(C(=O)CO)C3(C)CC(O)C12F are allowed
2020-09-21 11:23:48.596 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCSCc1ccccc1OC(=O)NC are allowed
2020-09-21 11:23:48.598 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC(=O)CC(=O)OCC are allowed
2020-09-21 11:23:48.599 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=C(C)C1CC=C(C)CC1 are allowed
2020-09-21 11:23:48.601 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2c(c1)CCC2 are allowed
2020-09-21 11:23:48.602 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)(C)c1ccc(O)cc1 are allowed
2020-09-21 11:23:48.604 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=C1NC(=O)C2(CC2)C(=O)N1 ar

2020-09-21 11:23:48.702 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Nc1ccccc1O are allowed
2020-09-21 11:23:48.704 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCCC(=O)OCC are allowed
2020-09-21 11:23:48.705 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COCC(=O)N(c1c(C)cccc1C)C(C)C(=O)OC are allowed
2020-09-21 11:23:48.707 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CNC(=O)Oc1ccccc1OC(C)C are allowed
2020-09-21 11:23:48.709 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(C)Cl are allowed
2020-09-21 11:23:48.710 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1ccc2ccccc2c1 are allowed
2020-09-21 11:23:48.712 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)Oc1cc(-n2nc(C(C)(C)C)oc2=O)c(Cl)cc1Cl are allowed
2020-0

2020-09-21 11:23:48.814 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2 are allowed
2020-09-21 11:23:48.816 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1ccccc1O are allowed
2020-09-21 11:23:48.818 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCN1c2ncccc2C(=O)N(C)c2cccnc21 are allowed
2020-09-21 11:23:48.819 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CSC are allowed
2020-09-21 11:23:48.821 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccccc1Br are allowed
2020-09-21 11:23:48.823 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC(N)=O are allowed
2020-09-21 11:23:48.824 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OC1(C(C)=O)CCC2C3=CC(C)=C4CC(=O)CCC4(C)C3CCC21C are allowe

2020-09-21 11:23:48.916 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl are allowed
2020-09-21 11:23:48.918 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C are allowed
2020-09-21 11:23:48.920 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cccc(Cl)c1Cl are allowed
2020-09-21 11:23:48.921 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule ClC(Cl)(Cl)Cl are allowed
2020-09-21 11:23:48.923 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])c1ccc(Cl)c(Cl)c1 are allowed
2020-09-21 11:23:48.924 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule OC1CCCCCCC1 are allowed
2020-09-21 11:23:48.926 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC12CCC(=O)C=C1CCC1C2CCC2(C)C1CCC2

2020-09-21 11:23:49.021 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)Nc1ccc(F)cc1 are allowed
2020-09-21 11:23:49.023 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1cccc(CC)c1N(COC)C(=O)CCl are allowed
2020-09-21 11:23:49.024 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1=CCCCC1 are allowed
2020-09-21 11:23:49.026 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO are allowed
2020-09-21 11:23:49.027 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1cncnc1 are allowed
2020-09-21 11:23:49.029 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])c1ccc(Cl)cc1 are allowed
2020-09-21 11:23:49.030 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(=O)OC are allowed
2020-09-21

2020-09-21 11:23:49.129 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCc1c(O)n(-c2ccccc2)n(-c2ccccc2)c1=O are allowed
2020-09-21 11:23:49.130 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1c([N+](=O)[O-])cccc1[N+](=O)[O-] are allowed
2020-09-21 11:23:49.132 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)C1CCC2C3CC=C4CC(=O)CCC4(C)C3CCC12C are allowed
2020-09-21 11:23:49.134 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCN(CC)c1nc(Cl)nc(N(CC)CC)n1 are allowed
2020-09-21 11:23:49.135 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1c2c(c(O)n1SC(Cl)(Cl)C(Cl)Cl)CC=CC2 are allowed
2020-09-21 11:23:49.137 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Brc1ccc(Br)c(Br)c1 are allowed
2020-09-21 11:23:49.138 | DEBUG    | remove_strange_mols:check_allowed_atoms:6

2020-09-21 11:23:49.229 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCBr are allowed
2020-09-21 11:23:49.231 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC(C)C are allowed
2020-09-21 11:23:49.233 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F are allowed
2020-09-21 11:23:49.234 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1cccc(C)c1N(C(=O)CCl)C(C)COC are allowed
2020-09-21 11:23:49.236 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])c1ccc(C=NO)o1 are allowed
2020-09-21 11:23:49.238 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)C(Nc1ccc(C(F)(F)F)cc1Cl)C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1 are allowed
2020-09-21 11:23:49.239 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All at

2020-09-21 11:23:49.338 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC1=C2C3OC(=O)C(C)C3CCC2(C)C=CC1=O are allowed
2020-09-21 11:23:49.339 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule OCc1ccccc1OC1OC(CO)C(O)C(O)C1O are allowed
2020-09-21 11:23:49.341 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCI are allowed
2020-09-21 11:23:49.342 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCN=c1[nH]c(SC)nc(=NC(C)C)[nH]1 are allowed
2020-09-21 11:23:49.347 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCO are allowed
2020-09-21 11:23:49.348 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)C1(O)CCC2C3CC=C4CC(=O)CCC4(C)C3CCC21C are allowed
2020-09-21 11:23:49.349 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC(C)O are allowed

2020-09-21 11:23:49.442 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1CCCC1 are allowed
2020-09-21 11:23:49.444 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccccc1N are allowed
2020-09-21 11:23:49.446 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CCc1ccc(OC)cc1 are allowed
2020-09-21 11:23:49.447 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1 are allowed
2020-09-21 11:23:49.449 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CC(C)C are allowed
2020-09-21 11:23:49.450 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=c1cccc[nH]1 are allowed
2020-09-21 11:23:49.452 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC are allowed
2020-09-21 11:23:49.453 | DEBUG    | remove_strange_mols:chec

2020-09-21 11:23:49.537 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC1(C)CON(Cc2ccccc2Cl)C1=O are allowed
2020-09-21 11:23:49.538 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCOCCO are allowed
2020-09-21 11:23:49.539 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])c1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl are allowed
2020-09-21 11:23:49.540 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC12CCC3C(CCC4CC(O)CCC43C)C1CCC2=O are allowed
2020-09-21 11:23:49.541 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=C1C(Cl)C(CCl)CN1c1cccc(C(F)(F)F)c1 are allowed
2020-09-21 11:23:49.542 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2ncccc2c1 are allowed
2020-09-21 11:23:49.543 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COC(=O)c1c

2020-09-21 11:23:49.611 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)(C)c1ccccc1 are allowed
2020-09-21 11:23:49.613 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)CCc1c(O)n(-c2ccccc2)n(-c2ccccc2)c1=O are allowed
2020-09-21 11:23:49.614 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OCC(=O)C1(O)CCC2C3CC=C4CC(=O)C=CC4(C)C3C(O)CC21C are allowed
2020-09-21 11:23:49.615 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCOC are allowed
2020-09-21 11:23:49.616 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OC(C)C are allowed
2020-09-21 11:23:49.617 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Brc1ccccc1 are allowed
2020-09-21 11:23:49.618 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC(=O)c1ccc(O)cc1 are all

2020-09-21 11:23:49.687 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2c(c1)cnc1ccccc12 are allowed
2020-09-21 11:23:49.688 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC(C)(O)CC are allowed
2020-09-21 11:23:49.689 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCC are allowed
2020-09-21 11:23:49.690 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2cc3ccccc3cc2c1 are allowed
2020-09-21 11:23:49.691 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule NNc1ccccc1 are allowed
2020-09-21 11:23:49.692 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC=O are allowed
2020-09-21 11:23:49.694 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1CCCCCCC1 are allowed
2020-09-21 11:23:49.699 | DEBUG    | remove_strange_mols:check_all

2020-09-21 11:23:49.762 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1ccc(Cl)c(-c2c(Cl)c(Cl)cc(Cl)c2Cl)c1 are allowed
2020-09-21 11:23:49.763 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC1(C(C)C)C(=O)NC(=O)NC1=O are allowed
2020-09-21 11:23:49.764 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(Cl)(Cl)Cl are allowed
2020-09-21 11:23:49.766 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CON(C)C(=O)Nc1ccc(Cl)cc1 are allowed
2020-09-21 11:23:49.767 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=C1NC(=O)C2(CCCCC2)C(=O)N1 are allowed
2020-09-21 11:23:49.768 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1 are allowed
2020-09-21 11:23:49.769 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccc(

2020-09-21 11:23:49.842 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CNc1ccccc1 are allowed
2020-09-21 11:23:49.843 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CCC=C are allowed
2020-09-21 11:23:49.845 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OCC(=O)C1(O)CCC2C3CC=C4CC(=O)CCC4(C)C3C(O)CC21C are allowed
2020-09-21 11:23:49.846 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1cc([N+](=O)[O-])cc([N+](=O)[O-])c1O are allowed
2020-09-21 11:23:49.847 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)NC1=O are allowed
2020-09-21 11:23:49.848 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1cccc(Cl)c1 are allowed
2020-09-21 11:23:49.849 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cccc(B

2020-09-21 11:23:49.917 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule N=c1[nH]c(=O)[nH]c2[nH]cnc12 are allowed
2020-09-21 11:23:49.918 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=c1c(C(CC(O)c2ccc(-c3ccc(Br)cc3)cc2)c2ccccc2)c(O)oc2ccccc12 are allowed
2020-09-21 11:23:49.919 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C[N+](=O)[O-] are allowed
2020-09-21 11:23:49.920 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)N(C(=O)SCC(Cl)=C(Cl)Cl)C(C)C are allowed
2020-09-21 11:23:49.921 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CCCC=C are allowed
2020-09-21 11:23:49.922 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2[nH]ccc2c1 are allowed
2020-09-21 11:23:49.923 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC12CCC3

2020-09-21 11:23:49.988 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C#CC1(O)CCC2C3CC=C4CC(=O)CCC4(O)C3CCC21C are allowed
2020-09-21 11:23:49.989 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)OC(=O)C(O)(c1ccc(Br)cc1)c1ccc(Br)cc1 are allowed
2020-09-21 11:23:49.990 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Nc1cnn(-c2ccccc2)c(=O)c1Cl are allowed
2020-09-21 11:23:49.991 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(C)(C)O are allowed
2020-09-21 11:23:49.992 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccc(O)cc1 are allowed
2020-09-21 11:23:49.993 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC=O are allowed
2020-09-21 11:23:49.994 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CN(C)c1ccccc1 are allowed
2020-

2020-09-21 11:23:50.052 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1cc(Cl)cc(Cl)c1 are allowed
2020-09-21 11:23:50.054 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1cccc2c1ccc1ccccc12 are allowed
2020-09-21 11:23:50.055 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCC(CC)CO are allowed
2020-09-21 11:23:50.056 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)N(C(=O)SCC(Cl)=CCl)C(C)C are allowed
2020-09-21 11:23:50.057 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccccc1 are allowed
2020-09-21 11:23:50.058 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cccc(C(Cl)(Cl)Cl)n1 are allowed
2020-09-21 11:23:50.059 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1=CCCCCC1 are allowed
2020-09-21 11:23:50.060 | DEBUG  

2020-09-21 11:23:50.139 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCC(C)O are allowed
2020-09-21 11:23:50.141 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1 are allowed
2020-09-21 11:23:50.146 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCc1ccccc1 are allowed
2020-09-21 11:23:50.148 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=C1NC(=O)C(=O)N1 are allowed
2020-09-21 11:23:50.149 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COP(=S)(OC)Oc1ccc(Sc2ccc(OP(=S)(OC)OC)cc2)cc1 are allowed
2020-09-21 11:23:50.150 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule NS(=O)(=O)c1cc(C2(O)NC(=O)c3ccccc32)ccc1Cl are allowed
2020-09-21 11:23:50.151 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O

2020-09-21 11:23:50.215 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCC(C)O has more than 5 atoms
2020-09-21 11:23:50.216 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(=O)Nc1ccc(Br)cc1 has more than 5 atoms
2020-09-21 11:23:50.217 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Nc1cnn(-c2ccccc2)c(=O)c1Br has more than 5 atoms
2020-09-21 11:23:50.218 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COC(=O)C1=C(C)N=C(C)C(C(=O)OC)C1c1ccccc1[N+](=O)[O-] has more than 5 atoms
2020-09-21 11:23:50.220 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccc2ccc(C)nc2c1 has more than 5 atoms
2020-09-21 11:23:50.221 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C#CCCCCCC has more than 5 atoms
2020-09-21 11:23:50.222 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC1(C2=CCCCC2)C(=O)NC(=O)NC1=O has more than 5 atoms
2020-09-21 11:23:50.223 | DEBUG    | remove_strange_mols:check_at

2020-09-21 11:23:50.279 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1c[nH]c(=O)[nH]c1=O has more than 5 atoms
2020-09-21 11:23:50.280 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CC(C)C has less than 5 atoms
2020-09-21 11:23:50.281 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=c1[nH]cnc2c1ncn2C1OC(CO)C(O)C1O has more than 5 atoms
2020-09-21 11:23:50.281 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule N#Cc1cc(I)c(O)c(I)c1 has more than 5 atoms
2020-09-21 11:23:50.282 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O has more than 5 atoms
2020-09-21 11:23:50.283 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCC has more than 5 atoms
2020-09-21 11:23:50.284 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1ccccc1 has more than 5 atoms
2020-09-21 11:23:50.285 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Nc1ccc

2020-09-21 11:23:50.354 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCC(C)(COC(N)=O)COC(N)=O has more than 5 atoms
2020-09-21 11:23:50.354 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(=O)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C has more than 5 atoms
2020-09-21 11:23:50.355 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CI has less than 5 atoms
2020-09-21 11:23:50.356 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1CC(C)C(=O)C(C(O)CC2CC(=O)NC(=O)C2)C1 has more than 5 atoms
2020-09-21 11:23:50.357 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCC(=O)OCN1C(=O)NC(c2ccccc2)(c2ccccc2)C1=O has more than 5 atoms
2020-09-21 11:23:50.358 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1=CC(=O)CC(C)(C)C1 has more than 5 atoms
2020-09-21 11:23:50.359 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC(C)C1(CC)C(=O)NC(=O)NC1=O has more than 5 atoms
2020-09-21 11:23:50.360 | DEBUG

2020-09-21 11:23:50.420 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccc(-c2ccccc2)cc1 has more than 5 atoms
2020-09-21 11:23:50.420 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN=c1[nH]c(Cl)nc(=NC(C)C)[nH]1 has more than 5 atoms
2020-09-21 11:23:50.421 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule NC(=S)Nc1ccccc1 has more than 5 atoms
2020-09-21 11:23:50.426 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCC(=O)CCC has more than 5 atoms
2020-09-21 11:23:50.427 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(=O)C(C)(C)C has more than 5 atoms
2020-09-21 11:23:50.428 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1ccc(Cl)cc1 has more than 5 atoms
2020-09-21 11:23:50.429 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C1CCCCC1 has more than 5 atoms
2020-09-21 11:23:50.430 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cccc(N)c1 has more than 5 

2020-09-21 11:23:50.491 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCl has more than 5 atoms
2020-09-21 11:23:50.492 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1cc(-c2cc(Cl)c(Cl)c(Cl)c2Cl)c(Cl)c(Cl)c1Cl has more than 5 atoms
2020-09-21 11:23:50.493 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule OCC1OC(OCC2OC(OC3(CO)OC(CO)C(O)C3O)C(O)C(O)C2O)C(O)C(O)C1O has more than 5 atoms
2020-09-21 11:23:50.494 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCCCCCCCCCCCCCCCCCCC has more than 5 atoms
2020-09-21 11:23:50.495 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN1c2ccccc2N(C)C(=O)c2cccnc21 has more than 5 atoms
2020-09-21 11:23:50.497 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CC(Cl)Cl has less than 5 atoms
2020-09-21 11:23:50.498 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Nc1ccc(S(N)(=O)=O)cc1 has more than 5 atoms
2020-09-21 11:23:50.499 | DEBUG   

2020-09-21 11:23:50.559 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1c2ccccc2c(C)c2ccccc12 has more than 5 atoms
2020-09-21 11:23:50.560 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCC(=O)OC has more than 5 atoms
2020-09-21 11:23:50.561 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CN(C)C(=O)Nc1ccc(-n2nc(C(C)(C)C)oc2=O)c(Cl)c1 has more than 5 atoms
2020-09-21 11:23:50.562 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(=O)Nc1ccc(F)cc1 has more than 5 atoms
2020-09-21 11:23:50.563 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCc1cccc(CC)c1N(COC)C(=O)CCl has more than 5 atoms
2020-09-21 11:23:50.564 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C1=CCCCC1 has more than 5 atoms
2020-09-21 11:23:50.565 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO has more than 5 atoms
2020-09-21 11:23:50.566 | DEBUG    | remove_str

2020-09-21 11:23:50.629 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(=O)C1CCC2C3CC=C4CC(=O)CCC4(C)C3CCC12C has more than 5 atoms
2020-09-21 11:23:50.630 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN(CC)c1nc(Cl)nc(N(CC)CC)n1 has more than 5 atoms
2020-09-21 11:23:50.631 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1c2c(c(O)n1SC(Cl)(Cl)C(Cl)Cl)CC=CC2 has more than 5 atoms
2020-09-21 11:23:50.632 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Brc1ccc(Br)c(Br)c1 has more than 5 atoms
2020-09-21 11:23:50.633 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C1Nc2ccc(Cl)cc2C(c2ccccc2)NC1=O has more than 5 atoms
2020-09-21 11:23:50.634 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=CCC1(C(C)CCC)C(=O)NC(=O)NC1=O has more than 5 atoms
2020-09-21 11:23:50.634 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccc(C(C)C)cc1O has more than 5 atoms
2020-09-21 11:23:50.635 | 

2020-09-21 11:23:50.696 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule N=c1nc[nH][nH]1 has more than 5 atoms
2020-09-21 11:23:50.697 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule BrC(Br)Br has less than 5 atoms
2020-09-21 11:23:50.698 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COP(=O)(OC)C(O)C(Cl)(Cl)Cl has more than 5 atoms
2020-09-21 11:23:50.699 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc21 has more than 5 atoms
2020-09-21 11:23:50.702 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule OCc1ccccc1 has more than 5 atoms
2020-09-21 11:23:50.704 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=c1c(C2CCCc3ccccc32)c(O)oc2ccccc12 has more than 5 atoms
2020-09-21 11:23:50.704 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1ccc(Br)cc1 has more than 5 atoms
2020-09-21 11:23:50.705 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule

2020-09-21 11:23:50.759 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccc(-c2cc(Cl)ccc2Cl)c(Cl)c1 has more than 5 atoms
2020-09-21 11:23:50.760 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl has more than 5 atoms
2020-09-21 11:23:50.760 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN=c1[nH]c(SC)nc(=NC(C)(C)C)[nH]1 has more than 5 atoms
2020-09-21 11:23:50.769 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC(C)CCO has more than 5 atoms
2020-09-21 11:23:50.770 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ncc2nccnc2n1 has more than 5 atoms
2020-09-21 11:23:50.772 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C#CC1(O)CCC2C3CCC4=Cc5oncc5CC4(C)C3CCC21C has more than 5 atoms
2020-09-21 11:23:50.772 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCI has more than 5 atoms
2020-09-21 11:23:50.774 | DEBUG    | remove_strange_mols:check_atoms:

2020-09-21 11:23:50.835 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1=CC(=O)c2ccccc2C1=O has more than 5 atoms
2020-09-21 11:23:50.836 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule c1ccc2c(c1)ccc1ccccc12 has more than 5 atoms
2020-09-21 11:23:50.837 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccnc(C)c1 has more than 5 atoms
2020-09-21 11:23:50.838 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCCO has more than 5 atoms
2020-09-21 11:23:50.839 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule BrCBr has less than 5 atoms
2020-09-21 11:23:50.841 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1CC2C3CC=C4CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO has more than 5 atoms
2020-09-21 11:23:50.841 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccc2cc(C)ccc2c1 has more than 5 atoms
2020-09-21 11:23:50.842 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule 

2020-09-21 11:23:50.904 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccc(C(c2ccccc2Cl)C(Cl)Cl)cc1 has more than 5 atoms
2020-09-21 11:23:50.905 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COc1ccccc1[N+](=O)[O-] has more than 5 atoms
2020-09-21 11:23:50.906 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC12CCC3C(CC=C4CC(O)CCC43C)C1CCC2=O has more than 5 atoms
2020-09-21 11:23:50.907 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC12CC1(C)C(=O)N(c1cc(Cl)cc(Cl)c1)C2=O has more than 5 atoms
2020-09-21 11:23:50.909 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56 has more than 5 atoms
2020-09-21 11:23:50.910 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC(C)c1cc([N+](=O)[O-])cc([N+](=O)[O-])c1O has more than 5 atoms
2020-09-21 11:23:50.911 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COC1=C(OC)C2C(=O)OCC2C=C1 has more than 5 atoms

2020-09-21 11:23:50.969 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C1NC(=O)C(O)(C2(O)C(=O)NC(=O)NC2=O)C(=O)N1 has more than 5 atoms
2020-09-21 11:23:50.970 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCC has more than 5 atoms
2020-09-21 11:23:50.970 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1ccccc1Cl has more than 5 atoms
2020-09-21 11:23:50.971 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cc2ccccc2c2ccc3ccccc3c12 has more than 5 atoms
2020-09-21 11:23:50.972 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOc1ccccc1 has more than 5 atoms
2020-09-21 11:23:50.973 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOC(=O)C=Cc1ccccc1 has more than 5 atoms
2020-09-21 11:23:50.974 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1[nH]c(=O)n(C(C)(C)C)c(=O)c1Cl has more than 5 atoms
2020-09-21 11:23:50.975 | DEBUG    | remove_strange_mols:check_atoms:73 - The

2020-09-21 11:23:51.040 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOP(=O)(OCC)OCC has more than 5 atoms
2020-09-21 11:23:51.042 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC12CCC(C1)C(C)(C)C2=O has more than 5 atoms
2020-09-21 11:23:51.042 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COc1cnc2cncnc2n1 has more than 5 atoms
2020-09-21 11:23:51.043 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC1=C(Cl)C2(Cl)C3CC=CC3C1(Cl)C2(Cl)Cl has more than 5 atoms
2020-09-21 11:23:51.045 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)[N+](=O)[O-] has more than 5 atoms
2020-09-21 11:23:51.046 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule c1ccc2c(c1)[nH]c1ccccc12 has more than 5 atoms
2020-09-21 11:23:51.047 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule OCC(O)C(O)CO has more than 5 atoms
2020-09-21 11:23:51.048 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecul

2020-09-21 11:23:51.110 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCC has more than 5 atoms
2020-09-21 11:23:51.110 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COC(=O)c1cccnc1 has more than 5 atoms
2020-09-21 11:23:51.111 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule NS(=O)(=O)c1cc2c(cc1C(F)(F)F)NC(Cc1ccccc1)NS2(=O)=O has more than 5 atoms
2020-09-21 11:23:51.112 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccc(-c2cc(Cl)c(Cl)c(Cl)c2Cl)cc1Cl has more than 5 atoms
2020-09-21 11:23:51.114 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=CC1(C)OC(=O)N(c2cc(Cl)cc(Cl)c2)C1=O has more than 5 atoms
2020-09-21 11:23:51.115 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN=c1[nH]c(Cl)nc(=NC(C)(C)C#N)[nH]1 has more than 5 atoms
2020-09-21 11:23:51.117 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule c1ccc2c(c1)c1ccccc1c1ccccc21 has more than 5 atoms
2020-09-21 11:23:51.

2020-09-21 11:23:51.176 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC12CCC3C(CCC4CC(=O)CCC43C)C1CCC2O has more than 5 atoms
2020-09-21 11:23:51.177 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=CC(O)CCC has more than 5 atoms
2020-09-21 11:23:51.178 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1[nH]c(O)c2c1C1C=C(C(O)(c3ccccc3)c3ccccn3)C2C1=C(c1ccccc1)c1ccccn1 has more than 5 atoms
2020-09-21 11:23:51.179 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCOCCCC has more than 5 atoms
2020-09-21 11:23:51.180 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCCCCCO has more than 5 atoms
2020-09-21 11:23:51.181 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN1c2nc(N(C)CCO)ccc2NC(=O)c2cccnc21 has more than 5 atoms
2020-09-21 11:23:51.181 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCC(C)(C)O has more than 5 atoms
2020-09-21 11:23:51.182 | DEBUG    | remove_stran

2020-09-21 11:23:51.246 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCN(CCC)c1c([N+](=O)[O-])cc(C(F)(F)F)cc1[N+](=O)[O-] has more than 5 atoms
2020-09-21 11:23:51.247 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CCO has less than 5 atoms
2020-09-21 11:23:51.248 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C1NC(=O)C2(CCCC2)C(=O)N1 has more than 5 atoms
2020-09-21 11:23:51.249 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCNC(=O)C(C)OC(=O)Nc1ccccc1 has more than 5 atoms
2020-09-21 11:23:51.250 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)=CC1C(C(=O)OCc2cccc(Oc3ccccc3)c2)C1(C)C has more than 5 atoms
2020-09-21 11:23:51.251 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CN(C)C(=O)NC1CCCCCCC1 has more than 5 atoms
2020-09-21 11:23:51.252 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC1(Cl)C2(Cl)C3(Cl)C4(Cl)C(Cl)(Cl)C5(Cl)C3(Cl)C1(Cl)C5(Cl)C24Cl has more than 

2020-09-21 11:23:51.313 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Sc1ccccc1 has more than 5 atoms
2020-09-21 11:23:51.314 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CNC(=O)Oc1cc(C)cc(C)c1 has more than 5 atoms
2020-09-21 11:23:51.315 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC1=C(Cl)C2(Cl)C3C(Cl)C(Cl)CC3C1(Cl)C2(Cl)Cl has more than 5 atoms
2020-09-21 11:23:51.316 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CSSC has less than 5 atoms
2020-09-21 11:23:51.317 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule NC(=O)c1ccccc1 has more than 5 atoms
2020-09-21 11:23:51.318 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccccc1Br has more than 5 atoms
2020-09-21 11:23:51.319 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COC(=O)c1ccccc1OC1OC(COC2OCC(O)C(O)C2O)C(O)C(O)C1O has more than 5 atoms
2020-09-21 11:23:51.320 | DEBUG    | remove_strange_mols:check_atoms:73 - 

2020-09-21 11:23:51.381 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule C=CC has less than 5 atoms
2020-09-21 11:23:51.382 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Oc1ccc(Cl)cc1Cc1cc(Cl)ccc1O has more than 5 atoms
2020-09-21 11:23:51.383 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOC(=O)Nc1cccc(OC(=O)Nc2ccccc2)c1 has more than 5 atoms
2020-09-21 11:23:51.384 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=C1c2ccccc2C(=O)c2ccccc21 has more than 5 atoms
2020-09-21 11:23:51.385 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCC(C)O has more than 5 atoms
2020-09-21 11:23:51.386 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1 has more than 5 atoms
2020-09-21 11:23:51.388 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCc1ccccc1 has more than 5 atoms
2020-09-21 11:23:51.389 | DEBUG    | remove_strange_mols:check_atoms:73 - The mole

In [88]:
esol_data = pd.read_csv(os.path.join(DATA_PATH, FILENAME))

In [89]:
esol_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1064 entries, 0 to 1063
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  1064 non-null   object 
 1   logS    1064 non-null   float64
dtypes: float64(1), object(1)
memory usage: 16.8+ KB


In [91]:
esol_data.duplicated().sum()

0

In [90]:
esol_data[SMILES_COLUMN].duplicated().sum()

6

In [92]:
esol_data_wo_aver = esol_data.groupby([SMILES_COLUMN]).agg(lambda x:x.value_counts().index[0])

In [93]:
esol_data_wo_aver.info()


<class 'pandas.core.frame.DataFrame'>
Index: 1058 entries, BrC(Br)(Br)Br to c1cncnc1
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   logS    1058 non-null   float64
dtypes: float64(1)
memory usage: 16.5+ KB


In [94]:
PROCESSED_PATH = "../data/3_final_data"
esol_data_wo_aver.to_csv(os.path.join(PROCESSED_PATH, 'esol.csv'))

## FreeSolv

In [109]:
DATA_PATH = "../data/3_final_data"
FILENAME = 'freesolv.csv'

In [110]:
remove_strange_mols(os.path.join(DATA_PATH, FILENAME), os.path.join(DATA_PATH, FILENAME))

2020-09-21 11:37:39.474 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1ccc(C(=O)N(C)C)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.476 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CS(=O)(=O)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.477 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=CC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.479 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCc1cnccn1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.480 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCCO isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.482 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cc(C)cc(O)c1 isn't in the list of very soluble in water molecules
2020-09-21 11

2020-09-21 11:37:39.542 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)(F)F isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.544 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC[C@@H](C)CO isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.545 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1ccccc1I isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.546 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COc1cccc(OC)c1O isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.547 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C#CCC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.548 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)(F)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.

2020-09-21 11:37:39.607 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.608 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCSCCC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.609 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC[C@@H](O)CC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.610 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC#N isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.612 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CN(CC(F)(F)F)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.613 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)O[C@H](Cl)C(F)(F)F isn't in the list of very soluble in water molecules
2020-09-21 11:

2020-09-21 11:37:39.677 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(C(C)C)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.678 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cn1ccnc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.679 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule OCCO isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.680 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1ccccc1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.684 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=c1cc(Cl)[nH]c(=O)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.685 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCOC=O isn't in the list of very soluble in water molecules
2020-09-21 11:37:

2020-09-21 11:37:39.740 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCOC(C)=O isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.741 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=C1CCCC1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.742 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCC(=O)O isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.743 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCBr isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.744 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc2cc(C)ccc2c1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.753 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCO isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.754 | D

2020-09-21 11:37:39.808 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCCC(C)=O isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.809 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCN isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.810 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCC(=O)OCC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.811 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(N)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.812 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCI isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.813 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)(Cl)C(F)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.8

2020-09-21 11:37:39.883 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCOCCOCC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.884 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCOC(=O)CC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.886 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCc1ccc(O)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.887 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC=C(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.888 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClCCCl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.889 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCC(C)(C)CC isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.890 | DE

2020-09-21 11:37:39.954 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCC[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.955 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=Cc1ccncc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.956 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClCCOCCCl isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.958 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.959 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Clc1cccnc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.960 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CBr isn't in the list of very soluble in water molecules
2020-09-21 11:37:39.961 |

2020-09-21 11:37:40.024 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Oc1ccc(F)cc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.025 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Nc1ccccc1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.026 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccc(C)c(C)c1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.028 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCc1ccccc1C isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.029 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=C(C)[C@@H]1CCC(C)C(=O)C1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.031 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc(-c2ccccc2)cc1 isn't in the list of very soluble in water mole

2020-09-21 11:37:40.092 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule BrCCBr isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.093 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClC(Cl)C(Cl)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.094 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=c1[nH]cc(C(F)(F)F)c(=O)[nH]1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.095 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1cccc(C)c1N isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.096 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCOC(C)=O isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.097 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1ccc2ncccc2c1 isn't in the list of very soluble in water molecul

2020-09-21 11:37:40.162 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=C[C@@](C)(CCC=C(C)C)OC(C)=O isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.164 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C[C@@H](CCO[N+](=O)[O-])O[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.165 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)OC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.166 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCC(C)C isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.168 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule O=[N+]([O-])c1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.170 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule FC(F)(F)[C@H](Cl)B

2020-09-21 11:37:40.222 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule NC(=O)c1ccccc1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.223 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(C)[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.224 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule OCC(O)CO isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.225 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCI isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.226 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule COCCN isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.226 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule ClCC(Cl)(Cl)Cl isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.227 | 

2020-09-21 11:37:40.275 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule C=C(C)[C@@H]1CC=C(C=O)CC1 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.276 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule Cc1ccccc1[N+](=O)[O-] isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.277 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCCI isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.278 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule c1cc2ccc3cccc4ccc(c1)c2c34 isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.280 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CCCCCCl isn't in the list of very soluble in water molecules
2020-09-21 11:37:40.281 | DEBUG    | remove_strange_mols:check_not_strange_mols:44 - The molecule CC(=O)OCC(C)C isn't in the list of very soluble in wate

2020-09-21 11:37:40.332 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1cnccn1 are allowed
2020-09-21 11:37:40.333 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCO are allowed
2020-09-21 11:37:40.334 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1cc(C)cc(O)c1 are allowed
2020-09-21 11:37:40.335 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)C(C)C are allowed
2020-09-21 11:37:40.336 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC(C)(C)O are allowed
2020-09-21 11:37:40.337 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C[C@H]1CCCC[C@H]1C are allowed
2020-09-21 11:37:40.338 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC[C@H](C)O are allowed
2020-09-21 11:37:40.339 | DEBUG    | remove_strange_mols:check_allowed

2020-09-21 11:37:40.413 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(F)F are allowed
2020-09-21 11:37:40.415 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Nc1cnn(-c2ccccc2)c(=O)c1Cl are allowed
2020-09-21 11:37:40.417 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=CC=C are allowed
2020-09-21 11:37:40.418 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CN(C)C are allowed
2020-09-21 11:37:40.419 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCC(N)=O are allowed
2020-09-21 11:37:40.420 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)CO[N+](=O)[O-] are allowed
2020-09-21 11:37:40.422 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=CCNc1cccc2c(O)c3ccccc3c(O)c12 are allowed
2020-09-21 11:37:40.423 | DEBUG    | remove_stra

2020-09-21 11:37:40.491 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2c(c1)ccc1ccccc12 are allowed
2020-09-21 11:37:40.492 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CI are allowed
2020-09-21 11:37:40.493 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COc1c(Cl)cc(Cl)c(OC)c1O are allowed
2020-09-21 11:37:40.494 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cl/C=C/Cl are allowed
2020-09-21 11:37:40.495 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCC are allowed
2020-09-21 11:37:40.496 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC#N are allowed
2020-09-21 11:37:40.497 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule F[C@H](Br)C(F)(F)F are allowed
2020-09-21 11:37:40.498 | DEBUG    | remove_strange_mols:check_allow

2020-09-21 11:37:40.586 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(=O)OC(C)C are allowed
2020-09-21 11:37:40.587 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccccc1 are allowed
2020-09-21 11:37:40.589 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cccc(Cl)c1Cl are allowed
2020-09-21 11:37:40.590 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOP(=S)(OCC)SCSc1ccc(Cl)cc1 are allowed
2020-09-21 11:37:40.592 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COP(=S)(OC)SCn1nnc2ccccc2c1=O are allowed
2020-09-21 11:37:40.594 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cc(Cl)c2c(c1Cl)Oc1ccccc1O2 are allowed
2020-09-21 11:37:40.595 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=C(C)C(=C)C are allowed
2020-09-21 11:37:4

2020-09-21 11:37:40.698 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Nc1ccc(N)c2c1C(=O)c1ccccc1C2=O are allowed
2020-09-21 11:37:40.699 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCC(C)=O are allowed
2020-09-21 11:37:40.700 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCN are allowed
2020-09-21 11:37:40.702 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCC(=O)OCC are allowed
2020-09-21 11:37:40.703 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccc(N)cc1 are allowed
2020-09-21 11:37:40.705 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCI are allowed
2020-09-21 11:37:40.706 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule FC(F)(Cl)C(F)(Cl)Cl are allowed
2020-09-21 11:37:40.708 | DEBUG    | remove_strange_mols

2020-09-21 11:37:40.799 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COc1cccc(N)c1 are allowed
2020-09-21 11:37:40.801 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=Cc1cccnc1 are allowed
2020-09-21 11:37:40.802 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(C)(C)O are allowed
2020-09-21 11:37:40.804 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1cccc(CC)c1N(COC)C(=O)CCl are allowed
2020-09-21 11:37:40.806 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cn1cccc1 are allowed
2020-09-21 11:37:40.807 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COCOC are allowed
2020-09-21 11:37:40.809 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(O)CC are allowed
2020-09-21 11:37:40.810 | DEBUG    | remove_strange_mols:check_allowed

2020-09-21 11:37:40.904 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule ClC(Cl)=C(Cl)Cl are allowed
2020-09-21 11:37:40.905 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CC(C)(C)Br are allowed
2020-09-21 11:37:40.907 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=C(c1ccccc1)c1ccccc1 are allowed
2020-09-21 11:37:40.908 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1ccc(C)cc1 are allowed
2020-09-21 11:37:40.910 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1cccnc1 are allowed
2020-09-21 11:37:40.912 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COCC(OC)(OC)OC are allowed
2020-09-21 11:37:40.913 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule c1ccc2c(c1)Cc1ccccc1-2 are allowed
2020-09-21 11:37:40.915 | DEBUG    | remove_stran

2020-09-21 11:37:41.011 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1ccc(C)c2ccccc12 are allowed
2020-09-21 11:37:41.013 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cc2c(cc1Cl)Oc1cc(Cl)c(Cl)cc1O2 are allowed
2020-09-21 11:37:41.031 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule N#Cc1c(Cl)cccc1Cl are allowed
2020-09-21 11:37:41.032 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC=O are allowed
2020-09-21 11:37:41.034 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Clc1cc(Cl)c(Cl)cc1Cl are allowed
2020-09-21 11:37:41.036 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOC(OCC)Oc1ccccc1 are allowed
2020-09-21 11:37:41.037 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])c1cccc(O)c1 are allowed
2020-09-21 11:37:

2020-09-21 11:37:41.134 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC/C=C/C=O are allowed
2020-09-21 11:37:41.136 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 are allowed
2020-09-21 11:37:41.138 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1CCOC1 are allowed
2020-09-21 11:37:41.139 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCCCCC are allowed
2020-09-21 11:37:41.141 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCN(CCC)c1c([N+](=O)[O-])cc(C(F)(F)F)cc1[N+](=O)[O-] are allowed
2020-09-21 11:37:41.142 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=C[C@@](C)(CCC=C(C)C)OC(C)=O are allowed
2020-09-21 11:37:41.144 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C[C@@H](CCO[N+](=O)[O-])O[

2020-09-21 11:37:41.239 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule ClCC(Cl)(Cl)Cl are allowed
2020-09-21 11:37:41.241 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCC(=O)OC are allowed
2020-09-21 11:37:41.242 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C1CCCC1 are allowed
2020-09-21 11:37:41.243 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCc1cccnc1 are allowed
2020-09-21 11:37:41.245 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1cncc(C)c1 are allowed
2020-09-21 11:37:41.246 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COCCO are allowed
2020-09-21 11:37:41.248 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule COC=O are allowed
2020-09-21 11:37:41.250 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms

2020-09-21 11:37:41.344 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCCCC(C)=O are allowed
2020-09-21 11:37:41.346 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule O=[N+]([O-])OCCO[N+](=O)[O-] are allowed
2020-09-21 11:37:41.347 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Oc1ccccc1F are allowed
2020-09-21 11:37:41.349 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule Cc1nc(N(C)C)nc(OC(=O)N(C)C)c1C are allowed
2020-09-21 11:37:41.350 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule C=Cc1ccccc1 are allowed
2020-09-21 11:37:41.352 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule CCOP(=O)(OCC)OCC are allowed
2020-09-21 11:37:41.353 | DEBUG    | remove_strange_mols:check_allowed_atoms:60 - All atoms in the molecule OCC(F)(F)F are allowed
2020-09-21 11:37:41.355 | DEBUG    | r

2020-09-21 11:37:41.449 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CN(C)C(=O)c1ccccc1 has more than 5 atoms
2020-09-21 11:37:41.450 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCNCC has more than 5 atoms
2020-09-21 11:37:41.451 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)(C)c1ccc(O)cc1 has more than 5 atoms
2020-09-21 11:37:41.452 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)CCOC=O has more than 5 atoms
2020-09-21 11:37:41.453 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCCCO has more than 5 atoms
2020-09-21 11:37:41.455 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOC(=O)CC has more than 5 atoms
2020-09-21 11:37:41.458 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCC has more than 5 atoms
2020-09-21 11:37:41.459 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CNC(C)=O has more than 5 atoms
2020-09-21 11:37:41.460 | DEB

2020-09-21 11:37:41.539 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CCCCl has less than 5 atoms
2020-09-21 11:37:41.541 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCSCCC has more than 5 atoms
2020-09-21 11:37:41.542 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC[C@@H](O)CC has more than 5 atoms
2020-09-21 11:37:41.543 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CC#N has less than 5 atoms
2020-09-21 11:37:41.545 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CN(CC(F)(F)F)c1ccccc1 has more than 5 atoms
2020-09-21 11:37:41.546 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule FC(F)O[C@H](Cl)C(F)(F)F has more than 5 atoms
2020-09-21 11:37:41.547 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=CCCC=C has more than 5 atoms
2020-09-21 11:37:41.548 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cccc(C)c1 has more than 5 atoms
2020-09-21 11:37:41.549 | 

2020-09-21 11:37:41.618 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC=C(Cl)Cl has more than 5 atoms
2020-09-21 11:37:41.619 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCN(CC)CC has more than 5 atoms
2020-09-21 11:37:41.620 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccc2c(c1)Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1O2 has more than 5 atoms
2020-09-21 11:37:41.621 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccncc1C has more than 5 atoms
2020-09-21 11:37:41.621 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=c1[nH]c(=O)[nH]c(=O)[nH]1 has more than 5 atoms
2020-09-21 11:37:41.622 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=Cc1ccccc1 has more than 5 atoms
2020-09-21 11:37:41.623 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1ccccn1 has more than 5 atoms
2020-09-21 11:37:41.624 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule C=CCCl has less than 5 at

2020-09-21 11:37:41.690 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCN has more than 5 atoms
2020-09-21 11:37:41.691 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1CCCC1 has more than 5 atoms
2020-09-21 11:37:41.692 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CCC has less than 5 atoms
2020-09-21 11:37:41.692 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C[C@H]1CCCO1 has more than 5 atoms
2020-09-21 11:37:41.693 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CNC(=O)Oc1cccc2ccccc12 has more than 5 atoms
2020-09-21 11:37:41.694 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=Cc1cccc(O)c1 has more than 5 atoms
2020-09-21 11:37:41.695 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule c1ccc2cc3ccccc3cc2c1 has more than 5 atoms
2020-09-21 11:37:41.696 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule ClCCl has less than 5 atoms
2020-09-21 11:37:41.697 | DE

2020-09-21 11:37:41.759 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCOC(=O)CC has more than 5 atoms
2020-09-21 11:37:41.760 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCc1ccc(O)cc1 has more than 5 atoms
2020-09-21 11:37:41.760 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC=C(C)C has more than 5 atoms
2020-09-21 11:37:41.761 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule ClCCCl has less than 5 atoms
2020-09-21 11:37:41.762 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCC(C)(C)CC has more than 5 atoms
2020-09-21 11:37:41.763 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cc2ccccc2cc1C has more than 5 atoms
2020-09-21 11:37:41.764 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cccc(C)n1 has more than 5 atoms
2020-09-21 11:37:41.765 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule COC(F)(F)C(Cl)Cl has more than 5 atoms
2020-09-21 11:37:41.766 

2020-09-21 11:37:41.830 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccccc1N has more than 5 atoms
2020-09-21 11:37:41.831 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCBr has more than 5 atoms
2020-09-21 11:37:41.832 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCCO has more than 5 atoms
2020-09-21 11:37:41.833 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1ccncc1 has more than 5 atoms
2020-09-21 11:37:41.836 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule ClC(Cl)=C(Cl)Cl has more than 5 atoms
2020-09-21 11:37:41.837 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)(C)Br has more than 5 atoms
2020-09-21 11:37:41.838 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=C(c1ccccc1)c1ccccc1 has more than 5 atoms
2020-09-21 11:37:41.838 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCc1ccc(C)cc1 has more than 5 atoms
2020-09-21 11:37:41.839 | 

2020-09-21 11:37:41.898 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1cc2c(cc1Cl)Oc1cc(Cl)c(Cl)cc1O2 has more than 5 atoms
2020-09-21 11:37:41.899 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule N#Cc1c(Cl)cccc1Cl has more than 5 atoms
2020-09-21 11:37:41.902 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOC=O has more than 5 atoms
2020-09-21 11:37:41.903 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1cc(Cl)c(Cl)cc1Cl has more than 5 atoms
2020-09-21 11:37:41.904 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCOC(OCC)Oc1ccccc1 has more than 5 atoms
2020-09-21 11:37:41.905 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=[N+]([O-])c1cccc(O)c1 has more than 5 atoms
2020-09-21 11:37:41.906 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCCCO has more than 5 atoms
2020-09-21 11:37:41.907 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule C=CCC has less t

2020-09-21 11:37:41.968 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C[C@@H](CCO[N+](=O)[O-])O[N+](=O)[O-] has more than 5 atoms
2020-09-21 11:37:41.969 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)OC(C)C has more than 5 atoms
2020-09-21 11:37:41.972 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCC(C)C has more than 5 atoms
2020-09-21 11:37:41.973 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=[N+]([O-])c1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl has more than 5 atoms
2020-09-21 11:37:41.974 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule FC(F)(F)[C@H](Cl)Br has more than 5 atoms
2020-09-21 11:37:41.975 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCOCCCC has more than 5 atoms
2020-09-21 11:37:41.976 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCC1CCCC1 has more than 5 atoms
2020-09-21 11:37:41.977 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)CC(

2020-09-21 11:37:42.039 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC(C)(C)c1ccccc1 has more than 5 atoms
2020-09-21 11:37:42.040 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCC(=O)OC has more than 5 atoms
2020-09-21 11:37:42.041 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C[C@H](O)C(F)(F)F has more than 5 atoms
2020-09-21 11:37:42.042 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCBr has more than 5 atoms
2020-09-21 11:37:42.042 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C=CCCCCCC has more than 5 atoms
2020-09-21 11:37:42.043 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CC1=CC(=O)C(C(C)C)CC1 has more than 5 atoms
2020-09-21 11:37:42.047 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CC(C)O has less than 5 atoms
2020-09-21 11:37:42.048 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCCCN has more than 5 atoms
2020-09-21 11:37:42.048 |

2020-09-21 11:37:42.111 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C1=CCCC1 has more than 5 atoms
2020-09-21 11:37:42.112 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C[C@@H](O)C(F)(F)F has more than 5 atoms
2020-09-21 11:37:42.113 | DEBUG    | remove_strange_mols:check_atoms:71 - The molecule CCCN has less than 5 atoms
2020-09-21 11:37:42.114 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule O=[N+]([O-])c1ccccc1O has more than 5 atoms
2020-09-21 11:37:42.115 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Cc1cccc2ccccc12 has more than 5 atoms
2020-09-21 11:37:42.116 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule Clc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl has more than 5 atoms
2020-09-21 11:37:42.118 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule CCCCC/C=C/C=O has more than 5 atoms
2020-09-21 11:37:42.120 | DEBUG    | remove_strange_mols:check_atoms:73 - The molecule C#CCCCCCC has more than 5 atoms
2020

In [111]:
freesolv_data = pd.read_csv(os.path.join(DATA_PATH, FILENAME))

In [112]:
freesolv_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 565 entries, 0 to 564
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   smiles  565 non-null    object 
 1   Energy  565 non-null    float64
dtypes: float64(1), object(1)
memory usage: 9.0+ KB


In [113]:
freesolv_data.duplicated().sum()

0

In [114]:
freesolv_data[SMILES_COLUMN].duplicated().sum()

0

# Split_data

In [116]:
from sklearn.model_selection import train_test_split
import pandas as pd

DATA_PATH = "../data/3_final_data/"
data_names = ['esol', 'freesolv']

In [117]:
def train_test_validation_split(df):
    train_data, rest_data = train_test_split(df, test_size=0.3)
    test_data, validation_data = train_test_split(rest_data, test_size=0.5)
    return train_data.reset_index(drop=True), validation_data.reset_index(drop=True), test_data.reset_index(drop=True)

In [119]:
for file in data_names:
    
    data = pd.read_csv(DATA_PATH + file + '.csv')
    
    
    print(file, 'shape: ', data.shape)    
    train, validation, test = train_test_validation_split(data)
    print('SPLITTED SHAPES:\n\ttrain: {0}\n\tvalidation: {1}\n\ttest: {2}\n'.format(train.shape, validation.shape, test.shape))
    
    train.to_csv(DATA_PATH + 'split_data/' + file + '_train.csv')
    validation.to_csv(DATA_PATH + 'split_data/' + file + '_validation.csv')
    test.to_csv(DATA_PATH + 'split_data/' + file + '_test.csv')

esol shape:  (1058, 2)
SPLITTED SHAPES:
	train: (740, 2)
	validation: (159, 2)
	test: (159, 2)

freesolv shape:  (565, 2)
SPLITTED SHAPES:
	train: (395, 2)
	validation: (85, 2)
	test: (85, 2)

