In [14]:
#this cell imports the necessary packages and loads the ESOL dataset using a graph convolutional featurizer before splitting datasets into their respective datasets
#tutorials on DC github (in particular tutorial 06) were referenced heavily
import warnings
warnings.filterwarnings('ignore')
import deepchem as dc

tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = datasets

In [15]:
#this cell creates the model and trains it. 100 rounds of backpropoagation occurred with no early callback adopted (meaning the chances of overfitting are significantly higher)
#note that this approach does not allow one to influence the size of the NN nor the shape which are parameters that could be explored in order to reduce the loss function
n_tasks = len(tasks)
model = dc.models.GraphConvModel(n_tasks, mode='regression')
model.fit(train_dataset, nb_epoch=100)

0.02226135492324829

In [16]:
#first metric recorded
metric = dc.metrics.Metric(dc.metrics.mae_score)
print('Training set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Training set score: {'mae_score': 0.2609849997834114}
Test set score: {'mae_score': 1.0435558278876267}


In [17]:
#second metric recorded
metric = dc.metrics.Metric(dc.metrics.rms_score)
print('Training set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Training set score: {'rms_score': 0.34640199888894085}
Test set score: {'rms_score': 1.3309637001470798}


In [18]:
#third metric recorded
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
print('Training set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Training set score: {'pearson_r2_score': 0.9750148657449388}
Test set score: {'pearson_r2_score': 0.6298494902434739}
