In [None]:
import numpy as np
import tensorflow as tf

import deepchem as dc
from deepchem.models import GraphConvModel
from deepchem.molnet import load_delaney

In [None]:
delaney_tasks = [ 'measured log solubility in mols per litre' ]

In [None]:
def prepare_dataset() :
    featurizer = dc.feat.ConvMolFeaturizer()
    loader = dc.data.CSVLoader( tasks=delaney_tasks, feature_field="smiles", featurizer=featurizer )
    dataset_file = '../data/delaney-processed.csv'
    dataset = loader.featurize( dataset_file, shard_size=8192 )
    return dataset

In [None]:
def transform_dataset( dataset ) :
    transformers = [ dc.trans.NormalizationTransformer( transform_y = True, dataset=dataset ) ]
    for transformer in transformers:
        dataset = transformer.transform(dataset)

    splitter = dc.splits.IndexSplitter()
    train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset)
    return train_dataset, valid_dataset, test_dataset, transformers

In [None]:
def make_model( ) :
    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
    batch_size = 128
    model = GraphConvModel( len(delaney_tasks), batch_size=batch_size, mode='regression')
    return model, metric;

In [None]:
def main():
    dataset = prepare_dataset()
    train_dataset, valid_dataset, test_dataset, transformers = transform_dataset( dataset )
    
    model, metric = make_model()
    result = model.fit(train_dataset, nb_epoch=100)
    print( 'result = %.2f' % result )
    
    
    print("Evaluating model")
    train_scores = model.evaluate(train_dataset, [metric], transformers)
    valid_scores = model.evaluate(valid_dataset, [metric], transformers)

    print("Train scores :", train_scores)
    print("Validation scores : ", valid_scores)

In [None]:
main()