# Loading libs & Setting model parameters

In [1]:
# Path to Neural Fingerprint scripts

import sys
sys.path
sys.path.append('../../../scripts/baselines/neuralfingerprints/utils')



In [8]:
import autograd.numpy as np
import autograd.numpy.random as npr

from build_vanilla_net import build_morgan_deep_net
from build_convnet import build_conv_deep_net
from util import normalize_array, build_batched_grad
from optimizers import adam
from util import rmse

from autograd import grad

In [19]:
from sklearn.metrics import r2_score

In [9]:
from rdkit.Chem import MolFromSmiles

In [39]:
def train_nn(pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params, seed=0,
             validation_smiles=None, validation_raw_targets=None):
    """loss_fun has inputs (weights, smiles, targets)"""
    print ("Total number of weights in the network:", num_weights)
    init_weights = npr.RandomState(seed).randn(num_weights) * train_params['init_scale']

    num_print_examples = len(train_smiles)
    train_targets, undo_norm = normalize_array(train_raw_targets)
    training_curve = []
    def callback(weights, iter):
        if iter % 10 == 0:
            print ("max of weights", np.max(np.abs(weights)))
            train_preds = undo_norm(pred_fun(weights, train_smiles[:num_print_examples]))
            cur_loss = loss_fun(weights, train_smiles[:num_print_examples], train_targets[:num_print_examples])
            training_curve.append(cur_loss)
            print ("Iteration", iter, "loss", cur_loss,\
                  "train RMSE", rmse(train_preds, train_raw_targets[:num_print_examples])),
            print "Train R2", iter, ":", \
                    r2_score(train_raw_targets, train_preds)
            if validation_smiles is not None:
                validation_preds = undo_norm(pred_fun(weights, validation_smiles))
                print ("Validation RMSE", iter, ":", rmse(validation_preds, validation_raw_targets)),
                print "Validation R2", iter, ":", \
                    r2_score(validation_raw_targets, validation_preds)

    # Build gradient using autograd.
    grad_fun = grad(loss_fun)
    grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'],
                                            train_smiles, train_targets)

    # Optimize weights.
    trained_weights = adam(grad_fun_with_data, init_weights, callback=callback,
                           num_iters=train_params['num_iters'], step_size=train_params['step_size'])

    def predict_func(new_smiles):
        """Returns to the original units that the raw targets were in."""
        return undo_norm(pred_fun(trained_weights, new_smiles))
    return predict_func, trained_weights, training_curve

In [31]:
def print_performance(pred_func):
    train_preds = pred_func(train_inputs)
    test_preds = pred_func(test_inputs)
    print "\nPerformance  on " + task_params['target_name'] + ":"
    print "Train RMSE:", rmse(train_preds, train_targets)
    print "Train R2:", r2_score(train_targets, train_preds)

    print "Test RMSE: ", rmse(test_preds,  test_targets)
    print "Test R2: ", r2_score(test_targets, test_preds)
    print "-" * 80
    return r2_score(test_targets, test_preds)

def run_morgan_experiment():
    loss_fun, pred_fun, net_parser = \
        build_morgan_deep_net(model_params['fp_length'],
                              model_params['fp_depth'], vanilla_net_params)
    num_weights = len(net_parser)
    predict_func, trained_weights, conv_training_curve = \
        train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
                 train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
    return print_performance(predict_func)

def run_conv_experiment():
    conv_layer_sizes = [model_params['conv_width']] * model_params['fp_depth']
    conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                        'fp_length' : model_params['fp_length'], 'normalize' : 1}
    loss_fun, pred_fun, conv_parser = \
        build_conv_deep_net(conv_arch_params, vanilla_net_params, model_params['L2_reg'])
    num_weights = len(conv_parser)
    predict_func, trained_weights, conv_training_curve = \
        train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
                 train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
    test_predictions = predict_func(test_inputs)
    return r2_score(test_targets, test_predictions), rmse(test_predictions, test_targets)


In [2]:
# new function for loading our datasets and looking for molecules with degree>5
def load_data(dataset_path = '../../../data/3_final_data/split_data', prefix_name='logP_pH_range_mean', VALUE_COLUMN = 'logP', SMILES_COLUMN='smiles'):
    import pandas as pd
    import os
    from rdkit.Chem import MolFromSmiles
    
    def check_molecules(smiles):
        mol = MolFromSmiles(smiles)
        for atom in mol.GetAtoms():
            if atom.GetDegree() not in [0, 1, 2, 3, 4, 5]:
                with open('../../../data/raw/broken_smiles_'+prefix_name+'.txt', 'a') as f:
                    f.write(smiles+'\n')
                return False
        return True
    
    with open('../../../data/raw/broken_smiles_'+prefix_name+'.txt', 'w') as f:
        pass
    
    data_splits = ['train', 'test', 'validation']
    
    datasets = {}
    
    for split in data_splits:
        data = pd.read_csv(os.path.join(dataset_path,prefix_name+'_'+split+'.csv'))
        data = data[data[SMILES_COLUMN].map(check_molecules)]
        datasets[split] = (data[SMILES_COLUMN].values, data[VALUE_COLUMN].values)
        
    
    return datasets

# Run model on the logP_pH_range_mean dataset

## Exp 1

In [26]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.
task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_pH_range_mean'}


model_params = dict(fp_length=20,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=3,      # The depth of the network equals the fingerprint radius.
                    conv_width=10,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)

In [27]:
print "Loading data..."
data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

train_inputs, train_targets = data['train']
val_inputs,   val_targets   = data['validation']
test_inputs,  test_targets  = data['test']

Loading data...


In [32]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_r2_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_r2_neural, test_rmse_neural = run_conv_experiment()
print
print "Morgan test R2:", test_r2_morgan, "Neural test RMSE:", test_rmse_neural,  "Neural test R2:", test_r2_neural

Task params {'target_name': 'logP', 'data_file': 'logP_pH_range_mean'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 2201)
('max of weights', 0.05807842887286866)
('Iteration', 0, 'loss', 0.9999109564225269, 'train RMSE', 2.1462148290688954) ('Validation RMSE', 0, ':', 2.0925806853436244) ('max of weights', 0.0700485368267423)
('Iteration', 10, 'loss', 0.9888576513072963, 'train RMSE', 2.1224386679568648) ('Validation RMSE', 10, ':', 2.0757267156712973) ('max of weights', 0.0925759792323924)
('Iteration', 20, 'loss', 0.9680222993369636, 'train RMSE', 2.0775635940746615) ('Validation RMSE', 20, ':', 2.0476423784000564) ('max of weights', 0.1162597058294584)
('Iteration', 30, 'loss', 0.9456881304325262, 'train RMSE', 2.029384294910751) ('Validation RMSE', 30, ':', 2.031594819258093) ('max of weights', 0.1397804585857589)
('Iteration', 40, 'loss', 0.9295632848378832, 'train RMSE', 1.994520261453428) ('Validation RMSE', 40, ':', 2.034066592493314) (

## Conclusion:

fp=50 better than 20

## Exp 2

In [33]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.



task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_pH_range_mean'}


model_params = dict(fp_length=50,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=4,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)



In [34]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_r2_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_r2_neural, test_rmse_neural = run_conv_experiment()
print
print "Morgan test R2:", test_r2_morgan, "Neural test RMSE:", test_rmse_neural,  "Neural test R2:", test_r2_neural

Task params {'target_name': 'logP', 'data_file': 'logP_pH_range_mean'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 5201)
('max of weights', 0.06962983567500523)
('Iteration', 0, 'loss', 0.9988033939079917, 'train RMSE', 2.1438361639957364) ('Validation RMSE', 0, ':', 2.0898162504176137) ('max of weights', 0.09032801579807535)
('Iteration', 10, 'loss', 0.9371983624390124, 'train RMSE', 2.011529194456203) ('Validation RMSE', 10, ':', 2.0049638705942296) ('max of weights', 0.10916021852213828)
('Iteration', 20, 'loss', 0.8513427238668987, 'train RMSE', 1.8270646319848718) ('Validation RMSE', 20, ':', 1.9166215982695638) ('max of weights', 0.12248950704544143)
('Iteration', 30, 'loss', 0.7895184035368686, 'train RMSE', 1.6941674487298546) ('Validation RMSE', 30, ':', 1.848161025661022) ('max of weights', 0.1441955227267383)
('Iteration', 40, 'loss', 0.7549531765229186, 'train RMSE', 1.619826848633481) ('Validation RMSE', 40, ':', 1.776781278913353

# Run model on the logp_mean dataset

## Exp 3

In [36]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.



task_params = {'target_name' : 'logP',
               'data_file'   : 'logp_mean'}


model_params = dict(fp_length=50,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=4,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)



In [37]:
print "Loading data..."
data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

train_inputs, train_targets = data['train']
val_inputs,   val_targets   = data['validation']
test_inputs,  test_targets  = data['test']

Loading data...


In [38]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_r2_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_r2_neural, test_rmse_neural = run_conv_experiment()
print
print "Morgan test R2:", test_r2_morgan, "Neural test RMSE:", test_rmse_neural,  "Neural test R2:", test_r2_neural

Task params {'target_name': 'logP', 'data_file': 'logp_mean'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 5201)
('max of weights', 0.06962983567500523)
('Iteration', 0, 'loss', 0.999519254924889, 'train RMSE', 1.8582909622736343) ('Validation RMSE', 0, ':', 1.8535196977483208) ('max of weights', 0.0768049562057181)
('Iteration', 10, 'loss', 0.9668801139856394, 'train RMSE', 1.79755642053455) ('Validation RMSE', 10, ':', 1.7982404347941785) ('max of weights', 0.10034534569408528)
('Iteration', 20, 'loss', 0.9308119004850787, 'train RMSE', 1.7304005521749122) ('Validation RMSE', 20, ':', 1.739658983911634) ('max of weights', 0.12042060326640294)
('Iteration', 30, 'loss', 0.8995877570663567, 'train RMSE', 1.6722443320109857) ('Validation RMSE', 30, ':', 1.6869890633195388) ('max of weights', 0.1324797097484821)
('Iteration', 40, 'loss', 0.8817925019039489, 'train RMSE', 1.639077186395681) ('Validation RMSE', 40, ':', 1.6539269288043832) ('max of 

# Run model on the logP_wo_parameters dataset

## Exp 4

In [42]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.



task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_wo_parameters'}


model_params = dict(fp_length=20,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=3,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)



In [43]:
print "Loading data..."
data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

train_inputs, train_targets = data['train']
val_inputs,   val_targets   = data['validation']
test_inputs,  test_targets  = data['test']

Loading data...


In [44]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_r2_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_r2_neural, test_rmse_neural = run_conv_experiment()
print
print "Morgan test R2:", test_r2_morgan, "Neural test RMSE:", test_rmse_neural,  "Neural test R2:", test_r2_neural

Task params {'target_name': 'logP', 'data_file': 'logP_wo_parameters'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 2201)
('max of weights', 0.05807842887286866)
('Iteration', 0, 'loss', 0.9999172456394382, 'train RMSE', 1.8144254556546984) Train R2 0 : 0.0002529018178529796
('Validation RMSE', 0, ':', 1.7858106538818643) Validation R2 0 : 0.0004917087827531752
('max of weights', 0.06878359295281855)
('Iteration', 10, 'loss', 0.994881701824503, 'train RMSE', 1.8052603503696412) Train R2 10 : 0.01032732581620699
('Validation RMSE', 10, ':', 1.7769096562502624) Validation R2 10 : 0.010430555597897628
('max of weights', 0.08660566636204872)
('Iteration', 20, 'loss', 0.9884930325378148, 'train RMSE', 1.7936116081713704) Train R2 20 : 0.023058173471691146
('Validation RMSE', 20, ':', 1.7662137672144962) Validation R2 20 : 0.02230788364604719
('max of weights', 0.10413101972886472)
('Iteration', 30, 'loss', 0.9794708619265983, 'train RMSE', 1.7771407

## Exp 5

In [45]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.



task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_wo_parameters'}


model_params = dict(fp_length=50,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=4,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=1000,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)



In [46]:
print "Loading data..."
data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

train_inputs, train_targets = data['train']
val_inputs,   val_targets   = data['validation']
test_inputs,  test_targets  = data['test']

Loading data...


In [47]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_r2_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_r2_neural, test_rmse_neural = run_conv_experiment()
print
print "Morgan test R2:", test_r2_morgan, "Neural test RMSE:", test_rmse_neural,  "Neural test R2:", test_r2_neural

Task params {'target_name': 'logP', 'data_file': 'logP_wo_parameters'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 5201)
('max of weights', 0.06962983567500523)
('Iteration', 0, 'loss', 0.99972477921577, 'train RMSE', 1.8140751113765883) Train R2 0 : 0.000638943425880667
('Validation RMSE', 0, ':', 1.7855753334019995) Validation R2 0 : 0.0007551065550347458
('max of weights', 0.07677219665574586)
('Iteration', 10, 'loss', 0.9754579071706453, 'train RMSE', 1.7700025371131636) Train R2 10 : 0.04860761815739967
('Validation RMSE', 10, ':', 1.7423064072614651) Validation R2 10 : 0.04859670895168722
('max of weights', 0.09707957703843811)
('Iteration', 20, 'loss', 0.9380743491560056, 'train RMSE', 1.7020849929486046) Train R2 20 : 0.12021940835986455
('Validation RMSE', 20, ':', 1.678486814343979) Validation R2 20 : 0.1170188144964952
('max of weights', 0.11799096659658179)
('Iteration', 30, 'loss', 0.9004124434833619, 'train RMSE', 1.6336303221122

('Iteration', 340, 'loss', 0.7973195078181227, 'train RMSE', 1.445489334067259) Train R2 340 : 0.36548531940448314
('Validation RMSE', 340, ':', 1.4905755394226547) Validation R2 340 : 0.3036564421000988
('max of weights', 0.3088064897503717)
('Iteration', 350, 'loss', 0.7950066796962716, 'train RMSE', 1.4412675864277555) Train R2 350 : 0.369186279279878
('Validation RMSE', 350, ':', 1.4913257963534858) Validation R2 350 : 0.30295527929860766
('max of weights', 0.313246889166233)
('Iteration', 360, 'loss', 0.7941052189758253, 'train RMSE', 1.4395839838812052) Train R2 360 : 0.3706591761812629
('Validation RMSE', 360, ':', 1.503533469346619) Validation R2 360 : 0.2914968553106013
('max of weights', 0.31807923935743576)
('Iteration', 370, 'loss', 0.7910650844681247, 'train RMSE', 1.4340140701909996) Train R2 370 : 0.3755197369608536
('Validation RMSE', 370, ':', 1.5135215238033377) Validation R2 370 : 0.28205233924317796
('max of weights', 0.324081063393978)
('Iteration', 380, 'loss', 0.

('Iteration', 680, 'loss', 0.7376730379900263, 'train RMSE', 1.3359574621980936) Train R2 680 : 0.4580026638580511
('Validation RMSE', 680, ':', 1.4674302143359605) Validation R2 680 : 0.3251138808436508
('max of weights', 0.4064931274025742)
('Iteration', 690, 'loss', 0.7351534959803375, 'train RMSE', 1.3313702471439912) Train R2 690 : 0.46171833585360944
('Validation RMSE', 690, ':', 1.4629978130665793) Validation R2 690 : 0.32918473657847935
('max of weights', 0.40125855757527606)
('Iteration', 700, 'loss', 0.7359524813632108, 'train RMSE', 1.332801379478859) Train R2 700 : 0.4605604816284554
('Validation RMSE', 700, ':', 1.4645697270356706) Validation R2 700 : 0.32774245094177357
('max of weights', 0.4057470529124215)
('Iteration', 710, 'loss', 0.7358694926934003, 'train RMSE', 1.3326125727788583) Train R2 710 : 0.46071330647209996
('Validation RMSE', 710, ':', 1.4736469467791815) Validation R2 710 : 0.31938349088527906
('max of weights', 0.40950321535172707)
('Iteration', 720, 'lo

('Validation RMSE', 0, ':', 1.7873409926215813) Validation R2 0 : -0.0012220693715578879
('max of weights', 0.08851418804094717)
('Iteration', 10, 'loss', 0.9647716724535383, 'train RMSE', 1.7505685815645893) Train R2 10 : 0.0693847798078796
('Validation RMSE', 10, ':', 1.7187692791232487) Validation R2 10 : 0.07412843348558107
('max of weights', 0.1117341296520073)
('Iteration', 20, 'loss', 0.9626491267297477, 'train RMSE', 1.7466497192905441) Train R2 20 : 0.07354670849891154
('Validation RMSE', 20, ':', 1.7140115783040382) Validation R2 20 : 0.07924712303560844
('max of weights', 0.13844185389152355)
('Iteration', 30, 'loss', 0.9574710450888683, 'train RMSE', 1.7371096302922395) Train R2 30 : 0.08363952704263455
('Validation RMSE', 30, ':', 1.7043702330329515) Validation R2 30 : 0.08957649190807015
('max of weights', 0.1710701401625447)
('Iteration', 40, 'loss', 0.9251020635197922, 'train RMSE', 1.6780899317418991) Train R2 40 : 0.14484989142578097
('Validation RMSE', 40, ':', 1.629

('Validation RMSE', 340, ':', 0.6733175164633974) Validation R2 340 : 0.8579126522533201
('max of weights', 0.6893679894749052)
('Iteration', 350, 'loss', 0.37405282823556746, 'train RMSE', 0.6760343760757622) Train R2 350 : 0.861212787815701
('Validation RMSE', 350, ':', 0.6863492298910436) Validation R2 350 : 0.8523593721555828
('max of weights', 0.712334853726883)
('Iteration', 360, 'loss', 0.36290699499432844, 'train RMSE', 0.6557877480326573) Train R2 360 : 0.8694014097666167
('Validation RMSE', 360, ':', 0.6772393041451343) Validation R2 360 : 0.8562526353109287
('max of weights', 0.7294061234030614)
('Iteration', 370, 'loss', 0.36366092236698866, 'train RMSE', 0.6570958490172459) Train R2 370 : 0.8688798796570243
('Validation RMSE', 370, ':', 0.6829784585677718) Validation R2 370 : 0.8538059848183592
('max of weights', 0.7423256035531871)
('Iteration', 380, 'loss', 0.359777698770096, 'train RMSE', 0.6500492569922063) Train R2 380 : 0.8716770236318497
('Validation RMSE', 380, ':'

('Validation RMSE', 680, ':', 0.6090903470661698) Validation R2 680 : 0.8837269631750186
('max of weights', 1.2745159005768647)
('Iteration', 690, 'loss', 0.31375646536999685, 'train RMSE', 0.5659339882280306) Train R2 690 : 0.9027379439594689
('Validation RMSE', 690, ':', 0.5928457025460371) Validation R2 690 : 0.8898463394054158
('max of weights', 1.2900874747751443)
('Iteration', 700, 'loss', 0.33901089705366516, 'train RMSE', 0.611736855194826) Train R2 700 : 0.8863573940357803
('Validation RMSE', 700, ':', 0.6317875586081678) Validation R2 700 : 0.8748998816197803
('max of weights', 1.3124248455551677)
('Iteration', 710, 'loss', 0.3135109314020986, 'train RMSE', 0.5654665290746176) Train R2 710 : 0.9028985537194079
('Validation RMSE', 710, ':', 0.5899948935990627) Validation R2 710 : 0.890903181045898
('max of weights', 1.3236370049305577)
('Iteration', 720, 'loss', 0.29886250816922827, 'train RMSE', 0.5388387925109017) Train R2 720 : 0.9118282216396815
('Validation RMSE', 720, ':

In [12]:
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_loss_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_loss_neural = run_conv_experiment()
print
print "Morgan test RMSE:", test_loss_morgan, "Neural test RMSE:", test_loss_neural

Task params {'target_name': 'logP', 'data_file': 'logP_wo_parameters'}

Starting Morgan fingerprint experiment...
('Total number of weights in the network:', 5201)
('max of weights', 0.06962983567500523)
('Iteration', 0, 'loss', 0.99972477921577, 'train RMSE', 1.8140751113765883) ('Validation RMSE', 0, ':', 1.7855753334019995) ('max of weights', 0.07677219665574586)
('Iteration', 10, 'loss', 0.9754579071706453, 'train RMSE', 1.7700025371131636) ('Validation RMSE', 10, ':', 1.7423064072614651) ('max of weights', 0.09707957703843811)
('Iteration', 20, 'loss', 0.9380743491560056, 'train RMSE', 1.7020849929486046) ('Validation RMSE', 20, ':', 1.678486814343979) ('max of weights', 0.11799096659658179)
('Iteration', 30, 'loss', 0.9004124434833619, 'train RMSE', 1.6336303221122352) ('Validation RMSE', 30, ':', 1.614749972389661) ('max of weights', 0.13703311646014693)
('Iteration', 40, 'loss', 0.8792170806671804, 'train RMSE', 1.5950658116336225) ('Validation RMSE', 40, ':', 1.574223163749969

('Iteration', 480, 'loss', 0.7683166468475676, 'train RMSE', 1.392296915757392) ('Validation RMSE', 480, ':', 1.477061060778449) ('max of weights', 0.35885454244202714)
('Iteration', 490, 'loss', 0.7688628748812374, 'train RMSE', 1.3932433869488092) ('Validation RMSE', 490, ':', 1.4695699168373548) ('max of weights', 0.359715595188665)
('Iteration', 500, 'loss', 0.7643861193627861, 'train RMSE', 1.3850879817864286) ('Validation RMSE', 500, ':', 1.4713753863878272) ('max of weights', 0.36853387227072104)
('Iteration', 510, 'loss', 0.7623287307165033, 'train RMSE', 1.3813362326235565) ('Validation RMSE', 510, ':', 1.4710839608097057) ('max of weights', 0.36522637727435897)
('Iteration', 520, 'loss', 0.7621369764060578, 'train RMSE', 1.3809711133576985) ('Validation RMSE', 520, ':', 1.4726569993769463) ('max of weights', 0.36597289013589535)
('Iteration', 530, 'loss', 0.7631810831409311, 'train RMSE', 1.382831977878866) ('Validation RMSE', 530, ':', 1.4797093866116438) ('max of weights', 

('Iteration', 970, 'loss', 0.7179562690518768, 'train RMSE', 1.2992545935267674) ('Validation RMSE', 970, ':', 1.4770247222186292) ('max of weights', 0.49058592359451497)
('Iteration', 980, 'loss', 0.7090781848791243, 'train RMSE', 1.283107320327959) ('Validation RMSE', 980, ':', 1.4788030928702542) ('max of weights', 0.5000463856106903)
('Iteration', 990, 'loss', 0.7110786830986975, 'train RMSE', 1.2866971014296755) ('Validation RMSE', 990, ':', 1.4886635725427888) 
Performance (RMSE) on logP:
Train: 1.277317737249006
Test:  1.4828592717261608
--------------------------------------------------------------------------------
Starting neural fingerprint experiment...
('Total number of weights in the network:', 32591)
('max of weights', 0.08535001578936458)
('Iteration', 0, 'loss', 1.0026942648417996, 'train RMSE', 1.819379784601551) ('Validation RMSE', 0, ':', 1.790742584458495) ('max of weights', 0.09035382889294036)
('Iteration', 10, 'loss', 0.9669116454403601, 'train RMSE', 1.75445812

('Iteration', 910, 'loss', 0.3041752058928946, 'train RMSE', 0.5476931605902587) ('Validation RMSE', 910, ':', 0.5877031417614789) ('max of weights', 1.486932106334076)
('Iteration', 920, 'loss', 0.2897449536879074, 'train RMSE', 0.5215236014882945) ('Validation RMSE', 920, ':', 0.5449270008293615) ('max of weights', 1.493375484155018)
('Iteration', 930, 'loss', 0.29053722744395244, 'train RMSE', 0.5229395700751343) ('Validation RMSE', 930, ':', 0.5458158991085782) ('max of weights', 1.501774259344498)
('Iteration', 940, 'loss', 0.2966521724502819, 'train RMSE', 0.5340069728136277) ('Validation RMSE', 940, ':', 0.5486011478935424) ('max of weights', 1.5086114384186529)
('Iteration', 950, 'loss', 0.3026879498405929, 'train RMSE', 0.5449681334631818) ('Validation RMSE', 950, ':', 0.5792164534288864) ('max of weights', 1.517239546317125)
('Iteration', 960, 'loss', 0.29243249667433446, 'train RMSE', 0.5263365051838831) ('Validation RMSE', 960, ':', 0.5555018724720935) ('max of weights', 1.