In [1]:
# Path to Neural Fingerprint scripts

import sys
sys.path
sys.path.append('../neuralfingerprint')



In [2]:
from sklearn.metrics import r2_score

In [3]:
import os, pickle
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd import grad
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import DrawingOptions
import matplotlib.pyplot as plt

from build_vanilla_net import build_morgan_deep_net, relu, build_standard_net
from build_convnet import build_conv_deep_net, build_convnet_fingerprint_fun
from util import normalize_array, build_batched_grad
from optimizers import adam
from util import rmse
from mol_graph import degrees
from data_util import remove_duplicates


In [4]:
# new function for loading our datasets
def load_data(dataset_path = '../../mol_properties/data/3_final_data/split_data', prefix_name='logP_pH_range_mean', VALUE_COLUMN = 'logP', SMILES_COLUMN='smiles'):
    import pandas as pd
    import os
    from rdkit.Chem import MolFromSmiles
    
    def check_molecules(smiles):
        mol = MolFromSmiles(smiles)
        for atom in mol.GetAtoms():
            if atom.GetDegree() not in [0, 1, 2, 3, 4, 5]:
                with open('broken_smiles_'+prefix_name+'.txt', 'a') as f:
                    f.write(smiles+'\n')
                return False
        return True
    
    with open('broken_smiles_'+prefix_name+'.txt', 'w') as f:
        pass
    
    data_splits = ['train', 'test', 'validation']
    
    datasets = {}
    
    for split in data_splits:
        data = pd.read_csv(os.path.join(dataset_path,prefix_name+'_'+split+'.csv'))
#         data = data[data[SMILES_COLUMN].map(check_molecules)]
        datasets[split] = (data[SMILES_COLUMN].values, data[VALUE_COLUMN].values)
        
    
    return datasets

In [60]:
def parse_training_params(params):
    nn_train_params = {'num_epochs'  : num_epochs,
                       'batch_size'  : batch_size,
#                        'learn_rate'  : params['learn_rate'],
#                        'b1'          : params['b1'],
#                        'b2'          : params['b2'],
                       'param_scale' : params['init_scale']}

    vanilla_net_params = {'layer_sizes':[params['fp_length']],  # Linear regression.
                          'normalize':normalize,
                          'L2_reg': params['l2_penalty'],
#                           'L1_reg': params['l1_penalty'],
                          'activation_function':activation}
    return nn_train_params, vanilla_net_params

def train_nn(pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params,
             validation_smiles=None, validation_raw_targets=None):
    """loss_fun has inputs (weights, smiles, targets)"""
    print "Total number of weights in the network:", num_weights
    npr.seed(0)
    init_weights = npr.randn(num_weights) * train_params['param_scale']

    train_targets, undo_norm = normalize_array(train_raw_targets)
    training_curve = []
    def callback(weights, iter):
        if iter % 10 == 0:
            print "max of weights", np.max(np.abs(weights))
            train_preds = undo_norm(pred_fun(weights, train_smiles))
            cur_loss = loss_fun(weights, train_smiles, train_targets)
            training_curve.append(cur_loss)
            print "Iteration", iter, "loss", cur_loss, "train RMSE", \
                np.sqrt(np.mean((train_preds - train_raw_targets)**2)),
            print "Train R2", iter, ":", \
                    r2_score(train_preds, train_raw_targets),
            if validation_smiles is not None:
                validation_preds = undo_norm(pred_fun(weights, validation_smiles))
                print "Validation RMSE", iter, ":", \
                    np.sqrt(np.mean((validation_preds - validation_raw_targets) ** 2)),
                print "Validation R2", iter, ":", \
                    r2_score(validation_preds, validation_raw_targets),
            dub_preds = undo_norm(pred_fun(weights, dub_wo_params_smiles))
            uniq_preds = undo_norm(pred_fun(weights, uniq_wo_params_smiles))
            print "Dub RMSE", iter, ":", rmse(dub_preds, dub_wo_params_targets)
            print "Unique RMSE", iter, ":", rmse(uniq_preds,  uniq_wo_params_targets)
            print "Dub R2", iter, ":", r2_score(dub_preds, dub_wo_params_targets)
            print "Unique R2", iter, ":", r2_score(uniq_preds,  uniq_wo_params_targets)

    grad_fun = grad(loss_fun)
    grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'],
                                            train_smiles, train_targets)

    num_iters = train_params['num_epochs'] * len(train_smiles) / train_params['batch_size']
    trained_weights = adam(grad_fun_with_data, init_weights, callback=callback,
                           num_iters=num_iters)#, step_size=train_params['learn_rate'],
                           #b1=train_params['b1'], b2=train_params['b2'])

    def predict_func(new_smiles):
        """Returns to the original units that the raw targets were in."""
        return undo_norm(pred_fun(trained_weights, new_smiles))
    return predict_func, trained_weights, training_curve


def train_neural_fingerprint():
    print "Loading data..."
    data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

    train_inputs, train_targets = data['train']
    val_inputs,   val_targets   = data['validation']
    test_inputs,  test_targets  = data['test']

    print "Regression on", len(train_inputs), "training points."
    def print_performance(pred_func):
        train_preds = pred_func(train_inputs)
        test_preds = pred_func(test_inputs)
        dub_preds = pred_func(dub_wo_params_smiles)
        uniq_preds = pred_func(uniq_wo_params_smiles)
        print "\nPerformance (RMSE) on " + task_params['target_name'] + ":"
        print "Train:", rmse(train_preds, train_targets)
        print "Test: ", rmse(test_preds,  test_targets)
        print "Dub:", rmse(dub_preds, dub_wo_params_targets)
        print "Unique: ", rmse(uniq_preds,  uniq_wo_params_targets)
        
        print "\nPerformance (R2) on " + task_params['target_name'] + ":"
        print "Train:", r2_score(train_preds, train_targets)
        print "Test: ", r2_score(test_preds,  test_targets)
        print "Dub:", r2_score(dub_preds, dub_wo_params_targets)
        print "Unique: ", r2_score(uniq_preds,  uniq_wo_params_targets)
        print "-" * 80
        return rmse(test_preds,  test_targets)

    print "-" * 80
    print "Mean predictor"
    y_train_mean = np.mean(train_targets)
    print_performance(lambda x : y_train_mean*np.ones(len(x)))

    print "Task params", params
    nn_train_params, vanilla_net_params = parse_training_params(params)
    conv_arch_params['return_atom_activations'] = False

    print "Convnet fingerprints with neural net"
    loss_fun, pred_fun, conv_parser = \
        build_conv_deep_net(conv_arch_params, vanilla_net_params, params['l2_penalty'])
    num_weights = len(conv_parser)
    predict_func, trained_weights, conv_training_curve = \
         train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
                 nn_train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
    print_performance(predict_func)
    return trained_weights


def draw_molecule_with_highlights(filename, smiles, highlight_atoms):
    drawoptions = DrawingOptions()
    drawoptions.selectColor = highlight_color
    drawoptions.elemDict = {}   # Don't color nodes based on their element.
    drawoptions.bgColor=None

    mol = Chem.MolFromSmiles(smiles)
    fig = Draw.MolToMPL(mol, highlightAtoms=highlight_atoms, size=figsize, options=drawoptions,fitImage=False)

    fig.gca().set_axis_off()
    fig.savefig(filename, bbox_inches='tight')
    plt.close(fig)


def construct_atom_neighbor_list(array_rep):
    atom_neighbour_list = []
    for degree in degrees:
        atom_neighbour_list += [list(neighbours) for neighbours in array_rep[('atom_neighbors', degree)]]
    return atom_neighbour_list


def plot(trained_weights):
    print "Loading data..."
    data = load_data(prefix_name = task_params['data_file'], VALUE_COLUMN = task_params['target_name'])

    train_smiles, train_targets = data['train']
    val_inputs,   val_targets   = data['validation']
    test_inputs,  test_targets  = data['test']

    print "Convnet fingerprints with neural net"
    conv_arch_params['return_atom_activations'] = True
    output_layer_fun, parser, compute_atom_activations = \
       build_convnet_fingerprint_fun(**conv_arch_params)
    atom_activations, array_rep = compute_atom_activations(trained_weights, train_smiles)

    if not os.path.exists('figures'): os.makedirs('figures')

    parent_molecule_dict = {}
    for mol_ix, atom_ixs in enumerate(array_rep['atom_list']):
        for atom_ix in atom_ixs:
            parent_molecule_dict[atom_ix] = mol_ix

    atom_neighbor_list = construct_atom_neighbor_list(array_rep)

    def get_neighborhood_ixs(array_rep, cur_atom_ix, radius):
        # Recursive function to get indices of all atoms in a certain radius.
        if radius == 0:
            return set([cur_atom_ix])
        else:
            cur_set = set([cur_atom_ix])
            for n_ix in atom_neighbor_list[cur_atom_ix]:
                cur_set.update(get_neighborhood_ixs(array_rep, n_ix, radius-1))
            return cur_set

    # Recreate trained network.
    nn_train_params, vanilla_net_params = parse_training_params(params)
    conv_arch_params['return_atom_activations'] = False
    _, _, combined_parser = \
        build_conv_deep_net(conv_arch_params, vanilla_net_params, params['l2_penalty'])

    net_loss_fun, net_pred_fun, net_parser = build_standard_net(**vanilla_net_params)
    net_weights = combined_parser.get(trained_weights, 'net weights')
    last_layer_weights = net_parser.get(net_weights, ('weights', 0))

    for fp_ix in range(params['fp_length']):
        print "FP {0} has linear regression coefficient {1}".format(fp_ix, last_layer_weights[fp_ix][0])
        combined_list = []
        for radius in all_radii:
            fp_activations = atom_activations[radius][:, fp_ix]
            combined_list += [(fp_activation, atom_ix, radius) for atom_ix, fp_activation in enumerate(fp_activations)]

        unique_list = remove_duplicates(combined_list, key_lambda=lambda x: x[0])
        combined_list = sorted(unique_list, key=lambda x: -x[0])

        for fig_ix in range(num_figs_per_fp):
            # Find the most-activating atoms for this fingerprint index, across all molecules and depths.
            activation, most_active_atom_ix, cur_radius = combined_list[fig_ix]
            most_activating_mol_ix = parent_molecule_dict[most_active_atom_ix]
            highlight_list_our_ixs = get_neighborhood_ixs(array_rep, most_active_atom_ix, cur_radius)
            highlight_list_rdkit = [array_rep['rdkit_ix'][our_ix] for our_ix in highlight_list_our_ixs]

            print "radius:", cur_radius, "atom list:", highlight_list_rdkit, "activation", activation
            draw_molecule_with_highlights(
                "figures/fp_{0}_highlight_{1}.pdf".format(fp_ix, fig_ix),
                train_smiles[most_activating_mol_ix],
                highlight_atoms=highlight_list_rdkit)




# Separating molecules with unique and several measurements

In [13]:
import numpy as np
import pandas as pd
import os
# import seaborn as sns
# from pathlib import Path
import matplotlib.pyplot as plt


In [29]:
DATASETS_PATH = "../../mol_properties/data/3_final_data"
SMILES_COLUMN = 'smiles'
VALUE_COLUMN = 'logP'

In [19]:
logP_dataset = pd.read_csv(os.path.join(DATASETS_PATH,'logP.csv'))

In [21]:
logP_mean_dataset = pd.read_csv(os.path.join(DATASETS_PATH,'logp_mean.csv'))

In [22]:
logP_wo_params_dataset = pd.read_csv(os.path.join(DATASETS_PATH,'logP_wo_parameters.csv'))

In [20]:
logP_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14111 entries, 0 to 14110
Data columns (total 2 columns):
smiles    14111 non-null object
logP      14111 non-null float64
dtypes: float64(1), object(1)
memory usage: 220.6+ KB


In [23]:
logP_mean_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13759 entries, 0 to 13758
Data columns (total 3 columns):
Unnamed: 0    13759 non-null int64
smiles        13759 non-null object
logP          13759 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 322.5+ KB


In [24]:
logP_wo_params_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12626 entries, 0 to 12625
Data columns (total 2 columns):
smiles    12626 non-null object
logP      12626 non-null float64
dtypes: float64(1), object(1)
memory usage: 197.4+ KB


In [27]:
duplicates_smiles = logP_dataset.groupby([SMILES_COLUMN]).count()

In [36]:
smiles_dub = list(duplicates_smiles[duplicates_smiles[VALUE_COLUMN]>1].index)

In [37]:
smiles_uniq = list(duplicates_smiles[duplicates_smiles[VALUE_COLUMN]==1].index)

In [41]:
dub_data_mean = logP_mean_dataset[logP_mean_dataset[SMILES_COLUMN].isin(smiles_dub)]
uniq_data_mean = logP_mean_dataset[logP_mean_dataset[SMILES_COLUMN].isin(smiles_uniq)]

In [49]:
dub_mean_smiles, dub_mean_targets = list(dub_data_mean[SMILES_COLUMN]), list(dub_data_mean[VALUE_COLUMN])
uniq_mean_smiles, uniq_mean_targets = list(uniq_data_mean[SMILES_COLUMN]), list(uniq_data_mean[VALUE_COLUMN])


In [42]:
dub_data_wo_params = logP_wo_params_dataset[logP_wo_params_dataset[SMILES_COLUMN].isin(smiles_dub)]
uniq_data_wo_params = logP_wo_params_dataset[logP_wo_params_dataset[SMILES_COLUMN].isin(smiles_uniq)]

In [48]:
dub_wo_params_smiles, dub_wo_params_targets = list(dub_data_wo_params[SMILES_COLUMN]), list(dub_data_wo_params[VALUE_COLUMN])
uniq_wo_params_smiles, uniq_wo_params_targets = list(uniq_data_wo_params[SMILES_COLUMN]), list(uniq_data_wo_params[VALUE_COLUMN])


## logP_mean

### Visualization

In [9]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logp_mean'}

num_epochs = 100
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 20,
            'fp_depth': 3,
            'init_scale':np.exp(-4),
            'learn_rate':np.exp(-4),
                    'b1':np.exp(-4),
                    'b2':np.exp(-4),
            'l2_penalty':np.exp(-4),
            'l1_penalty':np.exp(-5),
            'conv_width':10}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [10]:
trained_network_weights = train_neural_fingerprint()
with open('results.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

# Plotting.
with open('results.pkl') as f:
    trained_weights = pickle.load(f)
plot(trained_weights)

Loading data...
Regression on 800 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.859267165016053
Test:  1.8550727952633121
--------------------------------------------------------------------------------
Task params {'learn_rate': 0.01831563888873418, 'fp_depth': 3, 'b1': 0.01831563888873418, 'b2': 0.01831563888873418, 'init_scale': 0.01831563888873418, 'fp_length': 20, 'l2_penalty': 0.01831563888873418, 'l1_penalty': 0.006737946999085467, 'conv_width': 10}
Convnet fingerprints with neural net
Total number of weights in the network: 8791
max of weights 0.06962983567500523
Iteration 0 loss 1.052217399906365 train RMSE 1.9070721742622474 Validation RMSE 0 : 1.8954592417051612 max of weights 0.19345587019725458
Iteration 10 loss 1.4459786015306226 train RMSE 2.23564794998938 Validation RMSE 10 : 2.20525463097695 max of weights 0.27519345919657223
Iteration 20 loss 1.5785792888927956 tra

Iteration 1100 loss 0.8275428552112589 train RMSE 1.6861810065244007 Validation RMSE 1100 : 1.670135073740472 max of weights 4.4007104879977526
Iteration 1110 loss 0.7869945017613229 train RMSE 1.644052002729193 Validation RMSE 1110 : 1.6254906646420275 max of weights 4.478216359334269
Iteration 1120 loss 0.7460780713307693 train RMSE 1.6003886251166748 Validation RMSE 1120 : 1.585689058842461 max of weights 4.488513335261613
Iteration 1130 loss 0.7520146137037418 train RMSE 1.6067226253396465 Validation RMSE 1130 : 1.5884993760103834 max of weights 4.538805065029834
Iteration 1140 loss 0.7552935160738973 train RMSE 1.610211356390134 Validation RMSE 1140 : 1.5899921794918455 max of weights 4.652584899648605
Iteration 1150 loss 0.7371438920554406 train RMSE 1.590557928301801 Validation RMSE 1150 : 1.5705022886370295 max of weights 4.750433823819734
Iteration 1160 loss 0.7118374973268448 train RMSE 1.562803300072586 Validation RMSE 1160 : 1.5521829483860539 max of weights 4.8372022033005

Iteration 2240 loss 0.7800294818819324 train RMSE 1.6325409769574042 Validation RMSE 2240 : 1.6181995010572763 max of weights 7.072494604508286
Iteration 2250 loss 0.738867919949835 train RMSE 1.5882948493479627 Validation RMSE 2250 : 1.5727024272219554 max of weights 7.1599879219775
Iteration 2260 loss 0.7534846576062167 train RMSE 1.6040851603303183 Validation RMSE 2260 : 1.5908270426594806 max of weights 7.160947480739447
Iteration 2270 loss 0.7649524030975133 train RMSE 1.616346934156678 Validation RMSE 2270 : 1.598639334027432 max of weights 7.200135173174307
Iteration 2280 loss 0.6909149291035113 train RMSE 1.5350541800730086 Validation RMSE 2280 : 1.5098452283440598 max of weights 7.161224721919244
Iteration 2290 loss 0.7554551510194911 train RMSE 1.606055706354382 Validation RMSE 2290 : 1.5883242378507232 max of weights 7.225761334196772
Iteration 2300 loss 0.7448758680143666 train RMSE 1.594552880375717 Validation RMSE 2300 : 1.566577270242524 max of weights 7.241305186617356


Iteration 3380 loss 0.7257673668117092 train RMSE 1.5689404421101236 Validation RMSE 3380 : 1.5498984786688235 max of weights 9.419189490843175
Iteration 3390 loss 0.6932872114310349 train RMSE 1.5327063356173576 Validation RMSE 3390 : 1.5193058978259149 max of weights 9.399465629493136
Iteration 3400 loss 0.7742472371032477 train RMSE 1.6213689817805537 Validation RMSE 3400 : 1.6062760815797794 max of weights 9.343251188189551
Iteration 3410 loss 0.7261645671849288 train RMSE 1.5692402074668002 Validation RMSE 3410 : 1.5549429888545385 max of weights 9.357922905128115
Iteration 3420 loss 0.6589481367462365 train RMSE 1.4933455302175707 Validation RMSE 3420 : 1.4833502862549786 max of weights 9.349940304426859
Iteration 3430 loss 0.7395644267630144 train RMSE 1.58387690796536 Validation RMSE 3430 : 1.5683580469412768 max of weights 9.376497156270782
Iteration 3440 loss 0.7388363863801245 train RMSE 1.583028117364277 Validation RMSE 3440 : 1.566670675533332 max of weights 9.376408788379

KeyboardInterrupt: 

In [18]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logp_mean'}

num_epochs = 20
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'learn_rate':np.exp(-4),
                    'b1':np.exp(-4),
                    'b2':np.exp(-4),
            'l2_penalty':np.exp(-4),
            'l1_penalty':np.exp(-5),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [19]:
trained_network_weights = train_neural_fingerprint()
with open('results.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

# Plotting.
with open('results.pkl') as f:
    trained_weights = pickle.load(f)
plot(trained_weights)

Loading data...
Regression on 9629 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.859267165016053
Test:  1.8550727952633121
--------------------------------------------------------------------------------
Task params {'learn_rate': 0.01831563888873418, 'fp_depth': 4, 'b1': 0.01831563888873418, 'b2': 0.01831563888873418, 'init_scale': 0.01831563888873418, 'fp_length': 50, 'l2_penalty': 0.01831563888873418, 'l1_penalty': 0.006737946999085467, 'conv_width': 20}
Convnet fingerprints with neural net
Total number of weights in the network: 27441
max of weights 0.08144291040373952
Iteration 0 loss 0.9841043233938501 train RMSE 1.844335332789677 Validation RMSE 0 : 1.8476555767592138 max of weights 0.2205990397952285
Iteration 10 loss 2.0191148807011183 train RMSE 2.641851518701157 Validation RMSE 10 : 2.602044866427725 max of weights 0.3992600263094507
Iteration 20 loss 1.9018455863735966 t

Iteration 1110 loss 1.0535888952354093 train RMSE 1.9048544625131205 Validation RMSE 1110 : 1.8751696506977953 max of weights 6.3053848996004875
Iteration 1120 loss 1.01677021832338 train RMSE 1.8711199054174514 Validation RMSE 1120 : 1.8436427661992532 max of weights 6.318063009545165
Iteration 1130 loss 1.0120495975958348 train RMSE 1.8667072917127387 Validation RMSE 1130 : 1.8303444116902687 max of weights 6.3211051625845345
Iteration 1140 loss 1.044777224184134 train RMSE 1.8967574598371788 Validation RMSE 1140 : 1.8594123681283319 max of weights 6.323044733263152
Iteration 1150 loss 0.9893220348206032 train RMSE 1.8454892643939849 Validation RMSE 1150 : 1.804971465299186 max of weights 6.324699327753526


KeyboardInterrupt: 

In [23]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logp_mean'}

num_epochs = 10
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'l2_penalty':np.exp(-2),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [27]:
trained_network_weights = train_neural_fingerprint()
with open('results.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

# Plotting.
with open('results.pkl') as f:
    trained_weights = pickle.load(f)
plot(trained_weights)

 Loading data...
Regression on 9629 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.859267165016053
Test:  1.8550727952633121
--------------------------------------------------------------------------------
Task params {'fp_length': 50, 'l2_penalty': 0.1353352832366127, 'fp_depth': 4, 'conv_width': 20, 'init_scale': 0.01831563888873418}
Convnet fingerprints with neural net
Total number of weights in the network: 27441
max of weights 0.08144291040373952
Iteration 0 loss 0.9840853481222934 train RMSE 1.844335332789677 Validation RMSE 0 : 1.8476555767592138 max of weights 0.07492002162767124
Iteration 10 loss 0.9858015229240393 train RMSE 1.8459541041866336 Validation RMSE 10 : 1.8521251328305253 max of weights 0.08025294081454813
Iteration 20 loss 0.9844516996462558 train RMSE 1.8446898466666117 Validation RMSE 20 : 1.842170896370284 max of weights 0.0877317706072634
Iteration 30 loss 0

radius: 1 atom list: [11, 8] activation 0.03636088603222664
radius: 1 atom list: [11, 10] activation 0.036054789817693896
radius: 1 atom list: [3, 1] activation 0.035870824890196135
radius: 1 atom list: [13, 16, 15, 14] activation 0.035869335916685945
radius: 1 atom list: [11, 10] activation 0.03574187875187763
radius: 1 atom list: [17, 18] activation 0.03572668614631112
radius: 1 atom list: [2, 1] activation 0.035646941079506334
radius: 1 atom list: [0, 1] activation 0.0353135504974113
radius: 1 atom list: [7, 5] activation 0.0352996807271227
FP 3 has linear regression coefficient 0.0547802044756
radius: 2 atom list: [12, 4, 5, 16, 0, 2, 3, 11, 1, 6, 10] activation 0.05200975607028246
radius: 2 atom list: [9, 13, 23, 10, 20, 11, 12, 21, 22, 7, 8] activation 0.05171720722861676
radius: 2 atom list: [3, 7, 18, 11, 13, 4, 19, 6, 12, 5] activation 0.05109597873925983
radius: 2 atom list: [6, 7, 8, 15, 21, 5, 9, 14, 16, 1] activation 0.050881224175346816
radius: 2 atom list: [5, 10, 11, 13

radius: 1 atom list: [6, 7, 8, 15] activation 0.032058640503369595
radius: 1 atom list: [10, 6, 7, 8, 9] activation 0.03194752631083712
radius: 1 atom list: [13, 14, 9, 12, 15] activation 0.031921970451364266
radius: 1 atom list: [15, 16, 17, 13, 14] activation 0.03179513638155196
radius: 1 atom list: [22, 25, 20, 24, 23] activation 0.03179322759153739
FP 12 has linear regression coefficient -0.0399822244796
radius: 1 atom list: [14, 13, 15, 16, 17, 18] activation 0.0402598257267124
radius: 1 atom list: [8, 7] activation 0.03774257342327456
radius: 1 atom list: [11, 8] activation 0.0365743155280539
radius: 1 atom list: [11, 10] activation 0.036254390201922566
radius: 1 atom list: [13, 16, 15, 14] activation 0.036118974081954844
radius: 1 atom list: [3, 1] activation 0.03607925342742003
radius: 1 atom list: [2, 1] activation 0.035900605952832156
radius: 1 atom list: [14, 13] activation 0.035709072937530284
radius: 1 atom list: [11, 10] activation 0.03568792963449312
radius: 1 atom list:

radius: 3 atom list: [10, 16, 19, 20, 8, 9, 18, 4, 6, 5, 7, 17] activation 0.0849411727786653
radius: 3 atom list: [9, 11, 12, 14, 15, 17, 21, 3, 6, 8, 10, 16, 7] activation 0.08435447671686198
radius: 3 atom list: [7, 14, 20, 3, 6, 8, 9, 11, 12, 13, 4, 5] activation 0.08361949514410132
radius: 3 atom list: [2, 4, 6, 7, 9, 10, 11, 12, 3, 5, 1] activation 0.08354818134658865
radius: 3 atom list: [0, 4, 6, 8, 9, 10, 2, 1, 3, 5, 11, 12, 7] activation 0.08191483180159329
radius: 3 atom list: [7, 9, 11, 12, 15, 16, 5, 6, 8, 10, 17, 18] activation 0.08157555730714765
radius: 3 atom list: [19, 20, 21, 4, 6, 8, 14, 2, 3, 5, 7] activation 0.0812399727353034
radius: 3 atom list: [3, 5, 1, 2, 4, 6, 7, 9, 10, 11, 12] activation 0.08091872902924693
radius: 3 atom list: [1, 3, 9, 11, 8, 10, 12, 13, 16, 17, 18] activation 0.08025159260604021
radius: 3 atom list: [11, 3, 5, 6, 0, 8, 9, 10, 1, 2, 4, 12] activation 0.07991429444545813
radius: 3 atom list: [9, 11, 15, 17, 19, 21, 5, 10, 7, 18, 20, 2, 4, 

radius: 2 atom list: [6, 7, 8, 15, 21, 5, 9, 14, 16, 1] activation 0.053324467842339025
radius: 2 atom list: [7, 12, 13, 6, 4, 3, 5] activation 0.05328110202248333
radius: 2 atom list: [5, 10, 11, 13, 14, 8, 7, 6, 9] activation 0.053106212552841384
radius: 2 atom list: [6, 8, 9, 13, 4, 7, 5] activation 0.053084980724179595
radius: 2 atom list: [11, 16, 3, 10, 12, 17, 0, 2, 18, 1] activation 0.05305192895630075
radius: 2 atom list: [13, 19, 7, 9, 8, 12, 14, 11, 10] activation 0.05304968345808279
FP 30 has linear regression coefficient -0.0718351623914
radius: 3 atom list: [14, 10, 11, 12, 13, 15, 16, 17, 18] activation 0.07732746726833285
radius: 3 atom list: [3, 6, 1, 10, 0, 2, 4, 5] activation 0.0773269579582674
radius: 3 atom list: [3, 0, 1, 2, 4] activation 0.07705080249806932
radius: 3 atom list: [1, 2, 0, 3, 4] activation 0.07701022751699867
radius: 3 atom list: [4, 1, 3, 0, 2] activation 0.07698327360118123
radius: 3 atom list: [0, 3, 5, 1, 4, 2] activation 0.0769558945201774
rad

radius: 2 atom list: [2, 1, 0] activation 0.04451628933102522
radius: 2 atom list: [1, 3, 0, 2] activation 0.044374992641212724
radius: 2 atom list: [3, 1, 4] activation 0.04435625112774728
radius: 2 atom list: [16, 4, 18, 17] activation 0.044189732745716935
FP 39 has linear regression coefficient -0.0307688154402
radius: 1 atom list: [8, 7] activation 0.030352463152853632
radius: 1 atom list: [11, 10] activation 0.030099614390976505
radius: 1 atom list: [11, 8] activation 0.030065558026274655
radius: 1 atom list: [17, 18] activation 0.029909483444331118
radius: 1 atom list: [11, 10] activation 0.02990192857032803
radius: 1 atom list: [3, 1] activation 0.029901403433909617
radius: 1 atom list: [7, 5] activation 0.029884273806504588
radius: 1 atom list: [0, 1] activation 0.029812183476763877
radius: 1 atom list: [25, 26] activation 0.029784587275333223
radius: 1 atom list: [2, 1] activation 0.029716495042535176
radius: 1 atom list: [6, 5] activation 0.029704991267430897
FP 40 has linear

radius: 1 atom list: [3, 1] activation 0.03634896268803238
radius: 1 atom list: [2, 1] activation 0.03626771141871885
radius: 1 atom list: [8, 4, 7] activation 0.036238852455860816
radius: 1 atom list: [11, 10] activation 0.03619062596314381
radius: 1 atom list: [13, 16, 15, 14] activation 0.036113356725398255
radius: 1 atom list: [17, 18] activation 0.03603567356321768
radius: 1 atom list: [17, 16] activation 0.03587322405789431
FP 49 has linear regression coefficient -0.0612226765284
radius: 3 atom list: [0, 2, 3, 4, 1] activation 0.05284660798086984
radius: 3 atom list: [3, 5, 2, 8, 6, 0, 1, 4, 7] activation 0.052768338472256884
radius: 3 atom list: [4, 5, 6, 10, 11, 12, 7, 8, 9] activation 0.05275906538455532
radius: 3 atom list: [1, 5, 7, 2, 6, 13, 0, 3, 4] activation 0.052755561830026367
radius: 3 atom list: [4, 1, 0, 2, 3] activation 0.052744443893458004
radius: 3 atom list: [3, 1, 6, 0, 2, 4, 5, 7, 8] activation 0.05273728827442454
radius: 3 atom list: [0, 2, 3, 4, 1] activatio

### No visualization

In [6]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logp_mean'}

num_epochs = 10
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'l2_penalty':np.exp(-2),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [7]:
trained_network_weights = train_neural_fingerprint()
with open('results'+task_params['data_file']+'.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

Loading data...
Regression on 9631 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.8591219922763682
Test:  1.9087278157747403

Performance (R2) on logP:
Train: -1.7525698430687504e+31
Test:  -4.618357749255554e+30
--------------------------------------------------------------------------------
Task params {'fp_length': 50, 'l2_penalty': 0.1353352832366127, 'fp_depth': 4, 'conv_width': 20, 'init_scale': 0.01831563888873418}
Convnet fingerprints with neural net
Total number of weights in the network: 30571
max of weights 0.08535001578936458
Iteration 0 loss 1.0050158468991277 train RMSE 1.86369714187141 Train R2 0 : -18760.506803882163 Validation RMSE 0 : 1.8582204651665832 Validation R2 0 : -18395.132611209214 max of weights 0.08088414585016009
Iteration 10 loss 0.9832034478799879 train RMSE 1.8433730638836396 Train R2 10 : -527.3224731482445 Validation RMSE 10 : 1.8445310207053718 Val

Iteration 730 loss 0.1919403881859394 train RMSE 0.8109195859849205 Train R2 730 : 0.7486684639154484 Validation RMSE 730 : 0.804109695999905 Validation R2 730 : 0.750998620941955 max of weights 0.6060825920279131
Iteration 740 loss 0.18819764125643684 train RMSE 0.8028730618944706 Train R2 740 : 0.7585724226143383 Validation RMSE 740 : 0.7982059717991685 Validation R2 740 : 0.7605420847625309 max of weights 0.6157255259703738
Iteration 750 loss 0.1906207058902415 train RMSE 0.8080577099901245 Train R2 750 : 0.7588021175827063 Validation RMSE 750 : 0.8051934720349959 Validation R2 750 : 0.7580791448050538 max of weights 0.6236328126830659
Iteration 760 loss 0.1962478680757335 train RMSE 0.8199917369860722 Train R2 760 : 0.7506340768615449 Validation RMSE 760 : 0.8143920385282418 Validation R2 760 : 0.7533460987384756 max of weights 0.6287980581243671
Iteration 770 loss 0.20214912491905182 train RMSE 0.8323412795146214 Train R2 770 : 0.7392666208798098 Validation RMSE 770 : 0.8380894101

## logP_wo_parameters

### Visualization

In [28]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_wo_parameters'}

num_epochs = 10
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'l2_penalty':np.exp(-2),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [29]:
trained_network_weights = train_neural_fingerprint()
with open('results.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

# Plotting.
with open('results.pkl') as f:
    trained_weights = pickle.load(f)
plot(trained_weights)

Loading data...
Regression on 8837 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.8146549349304573
Test:  1.786769730059924
--------------------------------------------------------------------------------
Task params {'fp_length': 50, 'l2_penalty': 0.1353352832366127, 'fp_depth': 4, 'conv_width': 20, 'init_scale': 0.01831563888873418}
Convnet fingerprints with neural net
Total number of weights in the network: 27441
max of weights 0.08144291040373952
Iteration 0 loss 0.9886515906588851 train RMSE 1.8042531705773819 Validation RMSE 0 : 1.782187447872895 max of weights 0.08015057691461697
Iteration 10 loss 1.00953598902406 train RMSE 1.8232229054122153 Validation RMSE 10 : 1.8078542461999854 max of weights 0.07794223909831255
Iteration 20 loss 0.9836639417604071 train RMSE 1.799710182979298 Validation RMSE 20 : 1.775675355283461 max of weights 0.08258433888104313
Iteration 30 loss 0.98

radius: 3 atom list: [9, 1, 4, 5, 8, 3, 10] activation 0.03688111516210626
radius: 3 atom list: [7, 8, 10, 12, 14, 9, 11, 13] activation 0.03688006013900692
radius: 3 atom list: [11, 13, 15, 7, 10, 12, 14, 9] activation 0.03687983483591856
FP 4 has linear regression coefficient -0.0674564992073
radius: 2 atom list: [15, 14, 17, 18, 16, 12, 13] activation 0.07263332693084341
radius: 2 atom list: [9, 8, 5] activation 0.07235106461077892
radius: 2 atom list: [2, 3, 1] activation 0.07233746009289359
radius: 2 atom list: [15, 16, 14, 18, 17, 13] activation 0.07227312238876252
radius: 2 atom list: [2, 0, 1] activation 0.07172148223888844
radius: 2 atom list: [10, 12, 11] activation 0.07161859330708381
radius: 2 atom list: [15, 16, 14, 18, 17, 13] activation 0.07125508433223503
radius: 2 atom list: [6, 5, 7, 8, 9] activation 0.06784753921174579
radius: 2 atom list: [3, 4, 1] activation 0.06761490266998506
radius: 2 atom list: [0, 3, 4, 1, 2] activation 0.06663839644539474
radius: 2 atom list:

radius: 2 atom list: [2, 7, 8, 9, 16, 6, 10, 15, 17, 22] activation 0.032855760202695324
radius: 2 atom list: [4, 5, 6, 12, 3, 7, 11, 13, 18, 19] activation 0.032843916431971286
radius: 2 atom list: [22, 2, 6, 4, 3, 24, 5, 10, 18, 23] activation 0.03260556641020427
radius: 2 atom list: [4, 2, 3, 5, 6, 7, 13, 15, 16, 17] activation 0.03253848986335458
radius: 2 atom list: [4, 5, 6, 13, 3, 7, 12, 14, 18, 19] activation 0.03249104972292756
radius: 2 atom list: [3, 4, 5, 6, 12, 13, 17, 18, 14, 7] activation 0.032456611251177575
radius: 2 atom list: [15, 1, 4, 5, 14, 3, 6, 24, 13] activation 0.03244203223324115
radius: 2 atom list: [2, 18, 3, 5, 6, 7, 14, 16, 17, 4] activation 0.032408596541668194
radius: 2 atom list: [15, 9, 3, 5, 6, 7, 12, 14, 16, 8] activation 0.03239373144916207
radius: 2 atom list: [16, 2, 4, 6, 7, 8, 5, 18, 20, 19] activation 0.03237881662672333
radius: 2 atom list: [16, 3, 5, 6, 13, 15, 2, 7, 17, 4] activation 0.03236442825825944
FP 14 has linear regression coefficie

radius: 3 atom list: [8, 9, 10, 11, 12, 13, 14] activation 0.06745212504911544
radius: 3 atom list: [1, 2, 3, 4, 5, 6, 0] activation 0.06736070466556014
radius: 3 atom list: [21, 15, 16, 17, 18, 19, 20] activation 0.06734872400582491
FP 22 has linear regression coefficient 0.0485123786553
radius: 2 atom list: [2, 7, 8, 9, 16, 6, 10, 15, 17, 22] activation 0.03891463956639332
radius: 2 atom list: [4, 5, 6, 12, 3, 7, 11, 13, 18, 19] activation 0.03888369810899686
radius: 2 atom list: [4, 2, 3, 5, 6, 7, 13, 15, 16, 17] activation 0.03848435799501331
radius: 2 atom list: [22, 2, 6, 4, 3, 24, 5, 10, 18, 23] activation 0.03846792247654398
radius: 2 atom list: [15, 1, 4, 5, 14, 3, 6, 24, 13] activation 0.038354018799339154
radius: 2 atom list: [2, 18, 3, 5, 6, 7, 14, 16, 17, 4] activation 0.03824897640751383
radius: 2 atom list: [15, 9, 3, 5, 6, 7, 12, 14, 16, 8] activation 0.03823540972349227
radius: 2 atom list: [16, 2, 4, 6, 7, 8, 5, 18, 20, 19] activation 0.03822178109879029
radius: 2 ato

radius: 2 atom list: [15, 16, 14, 18, 17, 13] activation 0.07077890973992583
radius: 2 atom list: [3, 4, 1] activation 0.06879605989088831
radius: 2 atom list: [6, 5, 7, 8, 9] activation 0.06860070598033363
FP 31 has linear regression coefficient -0.0274620576738
radius: 1 atom list: [1, 0] activation 0.02801805741838737
radius: 1 atom list: [3, 2] activation 0.0280004733610676
radius: 1 atom list: [3, 5] activation 0.027892955209480402
radius: 1 atom list: [0, 1] activation 0.027759402721880416
radius: 1 atom list: [18, 17] activation 0.027666407481306495
radius: 1 atom list: [18, 21] activation 0.02756696982815133
radius: 1 atom list: [14, 13] activation 0.027562056498218693
radius: 1 atom list: [7, 8] activation 0.02755966037600879
radius: 1 atom list: [15, 16] activation 0.027554806874065023
radius: 1 atom list: [15, 16] activation 0.027518627262030027
radius: 1 atom list: [13, 14] activation 0.027499315917395517
FP 32 has linear regression coefficient 0.12921246698
radius: 4 atom 

FP 40 has linear regression coefficient -0.0354881487666
radius: 1 atom list: [9, 8] activation 0.03427144674794161
radius: 1 atom list: [15, 16, 14, 18, 17, 13] activation 0.03358054834053237
radius: 1 atom list: [3, 4] activation 0.03299661337020233
radius: 1 atom list: [6, 9] activation 0.03297020209571208
radius: 1 atom list: [18, 17] activation 0.032934216589622076
radius: 1 atom list: [7, 8] activation 0.0329180104602864
radius: 1 atom list: [9, 6] activation 0.032898419487616654
radius: 1 atom list: [13, 14] activation 0.032894218591640434
radius: 1 atom list: [0, 1] activation 0.032818986985712305
radius: 1 atom list: [1, 0] activation 0.03272626430368111
radius: 1 atom list: [3, 2] activation 0.03267710603359108
FP 41 has linear regression coefficient -0.0320808855754
radius: 1 atom list: [1, 0] activation 0.02993885778813455
radius: 1 atom list: [3, 2] activation 0.029813399975502447
radius: 1 atom list: [3, 5] activation 0.029752788204433903
radius: 1 atom list: [0, 1] activ

### No Visualization

In [8]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_wo_parameters'}

num_epochs = 10
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'l2_penalty':np.exp(-2),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [9]:
trained_network_weights = train_neural_fingerprint()
with open('results'+task_params['data_file']+'.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

Loading data...
Regression on 8838 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.8146042407352028
Test:  1.8141849992522239

Performance (R2) on logP:
Train: 0.0
Test:  0.0
--------------------------------------------------------------------------------
Task params {'fp_length': 50, 'l2_penalty': 0.1353352832366127, 'fp_depth': 4, 'conv_width': 20, 'init_scale': 0.01831563888873418}
Convnet fingerprints with neural net
Total number of weights in the network: 30571
max of weights 0.08535001578936458
Iteration 0 loss 1.0041956952405227 train RMSE 1.8183273856100233 Train R2 0 : -22107.556204434153 Validation RMSE 0 : 1.789294284026002 Validation R2 0 : -21766.52263076145 max of weights 0.08585993000392719
Iteration 10 loss 1.00273735062751 train RMSE 1.8170183194398009 Train R2 10 : -111.97485486306299 Validation RMSE 10 : 1.8001877783747764 Validation R2 10 : -111.66544845927847 max 

Iteration 720 loss 0.14835179807202856 train RMSE 0.6948236128476037 Train R2 720 : 0.8180406522189897 Validation RMSE 720 : 0.711683412537445 Validation R2 720 : 0.810815808051538 max of weights 0.5791162844924415
Iteration 730 loss 0.1419068642314763 train RMSE 0.6793615546571781 Train R2 730 : 0.8279495965605145 Validation RMSE 730 : 0.6815060914339749 Validation R2 730 : 0.8279013989733897 max of weights 0.5865479412308696
Iteration 740 loss 0.1483888952611154 train RMSE 0.6948737901107499 Train R2 740 : 0.8286902974892062 Validation RMSE 740 : 0.7058477111189645 Validation R2 740 : 0.8262762951799777 max of weights 0.5909151346221024
Iteration 750 loss 0.15051951212114395 train RMSE 0.6998953641200406 Train R2 750 : 0.8101860863143744 Validation RMSE 750 : 0.6968952312262747 Validation R2 750 : 0.8113070473969448 max of weights 0.59574089942562
Iteration 760 loss 0.14492985529687083 train RMSE 0.6865924143023695 Train R2 760 : 0.8232727352512635 Validation RMSE 760 : 0.69331158278

## Different errors for molecules with duplicates and unique measurements

In [61]:
task_params = {'target_name' : 'logP',
               'data_file'   : 'logP_wo_parameters'}

num_epochs = 10
batch_size = 100
normalize = 1
dropout = 0
activation = relu
params = {'fp_length': 50,
            'fp_depth': 4,
            'init_scale':np.exp(-4),
            'l2_penalty':np.exp(-2),
            'conv_width':20}

conv_layer_sizes = [params['conv_width']] * params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                    'fp_length' : params['fp_length'],
                    'normalize' : normalize,
                    'return_atom_activations':False}

all_radii = range(params['fp_depth'] + 1)

# Plotting parameters
num_figs_per_fp = 11
figsize = (100, 100)
highlight_color = (30.0/255.0, 100.0/255.0, 255.0/255.0)  # A nice light blue.

In [62]:
trained_network_weights = train_neural_fingerprint()
with open('results'+task_params['data_file']+'.pkl', 'w') as f:
    pickle.dump(trained_network_weights, f)

Loading data...
Regression on 8838 training points.
--------------------------------------------------------------------------------
Mean predictor

Performance (RMSE) on logP:
Train: 1.8146042407352028
Test:  1.8141849992522239
Dub: 2.1832152392353295
Unique:  1.8014880653973313

Performance (R2) on logP:
Train: 0.0
Test:  0.0
Dub: 0.0
Unique:  0.0
--------------------------------------------------------------------------------
Task params {'fp_length': 50, 'l2_penalty': 0.1353352832366127, 'fp_depth': 4, 'conv_width': 20, 'init_scale': 0.01831563888873418}
Convnet fingerprints with neural net
Total number of weights in the network: 30571
max of weights 0.08535001578936458
Iteration 0 loss 1.0041956952405227 train RMSE 1.8183273856100233 Train R2 0 : -22107.556204434153 Validation RMSE 0 : 1.789294284026002 Validation R2 0 : -21766.52263076145 Dub RMSE 0 : 2.1911715822316413
Unique RMSE 0 : 1.8048158473406302
Dub R2 0 : -34402.64057304353
Unique R2 0 : -22017.617080364544
max of weigh

Iteration 440 loss 0.1918334863355846 train RMSE 0.7916566735776115 Train R2 440 : 0.752198892407836 Validation RMSE 440 : 0.7999606324212283 Validation R2 440 : 0.7494302089824327 Dub RMSE 440 : 0.9136218989966348
Unique RMSE 440 : 0.7981911872944559
Dub R2 440 : 0.7710663726642895
Unique R2 440 : 0.7480747181107089
max of weights 0.38636674663407156
Iteration 450 loss 0.18743212851198196 train RMSE 0.7824414986754789 Train R2 450 : 0.7495319785612258 Validation RMSE 450 : 0.7862934469172072 Validation R2 450 : 0.7494357673173299 Dub RMSE 450 : 0.9187986328408969
Unique RMSE 450 : 0.7879588840602818
Dub R2 450 : 0.7586989657751735
Unique R2 450 : 0.7461076792908057
max of weights 0.3875078976523085
Iteration 460 loss 0.1894212792087862 train RMSE 0.7865983654868431 Train R2 460 : 0.7502527036262019 Validation RMSE 460 : 0.800410535561184 Validation R2 460 : 0.7436116725957366 Dub RMSE 460 : 0.8863908440072729
Unique RMSE 460 : 0.7950435800800327
Dub R2 460 : 0.770986848421243
Unique R