# Initialization

In [2]:
import math

import numpy as np
import pandas as pd
import darkchem

Using TensorFlow backend.


Load model:

In [3]:
model = darkchem.utils.load_model('/home/christinehc/direct-capstone/final_trained_darkchem_network_weights/N7b_[M+H]/')

Load training data:

In [4]:
x = np.load('/home/christinehc/direct-capstone/training-data_molecules/combined_[M+H]_smiles.npy')
y = np.load('/home/christinehc/direct-capstone/training-data_molecules/combined_[M+H]_labels.npy') # must have the same number of columns as the data the network was trained on

Generate latent space representations ($x$) from training data:

In [43]:
x_latent = model.encoder.predict(x)

Generate property predictions ($y$):

In [44]:
y_pred = model.predictor.predict(x_latent)

Predict SMILES outputs:

In [45]:
x_pred = model.decoder.predict(x_latent)

In [5]:
def array_in_nd_array(test, array):
    """
    Checks whether or not a test 1D array is contained within a full ND array.
    Returns True if the test array is equal to any of the dimensions of the ND array.
    Returns False if the test array does not match any dimension of the ND array.
    """
    return any(np.array_equal(x, test) for x in array)

# Encoding Rxns

## Alkylation of Enolates

1. $CH_{3}C(O)CH_{3} + OH^{-} + CH_{3}Br \rightarrow CH_{3}C(O)CH_{2}CH_{3} + H_{2}O + Br^{-}$
2. Cyclohexanone

In [11]:
smiles = {'reactants': ['CC(=O)C', 'O=C1CCCCC1', 'O=C1CCCC1C', 'O=C1CCCC1C', 'CCCC(=O)CCC',
                        'CCCCC(=O)CCCC', 'CC(=O)CC(=O)C', 'CC(=O)CC(=O)C', 'COCC(=O)C',
                        'CC(=O)c1ccccc1'], 
        'products': ['CCC(=O)C', 'CC1CCCCC1=O', 'CC1CCC(C1=O)C', 'O=C1CCCC1(C)C', 'CCCC(=O)C(CC)C',
                    'CCCCC(=O)C(CCC)C', 'CC(C(=O)C)C(=O)C', 'CC(=O)C(C(=O)C)(C)C', 'COC(C(=O)CC)C',
                    'CCC(=O)c1ccccc1']}

In [12]:
smilesdf = pd.DataFrame(smiles)
smilesdf.head()

Unnamed: 0,reactants,products
0,CC(=O)C,CCC(=O)C
1,O=C1CCCCC1,CC1CCCCC1=O
2,O=C1CCCC1C,CC1CCC(C1=O)C
3,O=C1CCCC1C,O=C1CCCC1(C)C
4,CCCC(=O)CCC,CCCC(=O)C(CC)C


In [13]:
smiles['rvec'] = [darkchem.utils.struct2vec(reactant).astype(np.int16) for reactant in smiles['reactants']]
smiles['pvec'] = [darkchem.utils.struct2vec(product).astype(np.int16) for product in smiles['products']]
#smiles['rxnvector'] = [smilesdf.pvec[i] - smilesdf.rvec[i] for i in range(len(smilesdf))]

In [15]:
[array_in_nd_array(vec, x) for vec in smiles['rvec']]

[True, True, True, True, True, True, True, True, True, True]

In [16]:
[array_in_nd_array(vec, x) for vec in smiles['pvec']]

[True, True, True, True, False, False, True, True, True, True]

In [33]:
np.corrcoef(smiles['rxnvector'])

array([[ 1.        , -0.00168265,  0.06415584, -0.01032847],
       [-0.00168265,  1.        ,  0.71701091,  0.34875671],
       [ 0.06415584,  0.71701091,  1.        ,  0.78291073],
       [-0.01032847,  0.34875671,  0.78291073,  1.        ]])