In [16]:
import numpy as np
import deepchem as dc

In [17]:
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21()

In [18]:
tox21_tasks #the tasks correspond to biochemical assays

['NR-AR',
 'NR-AR-LBD',
 'NR-AhR',
 'NR-Aromatase',
 'NR-ER',
 'NR-ER-LBD',
 'NR-PPAR-gamma',
 'SR-ARE',
 'SR-ATAD5',
 'SR-HSE',
 'SR-MMP',
 'SR-p53']

In [19]:
len(tox21_tasks)

12

In [20]:
tox21_datasets

(<DiskDataset X.shape: (6264, 1024), y.shape: (6264, 12), w.shape: (6264, 12), task_names: ['NR-AR' 'NR-AR-LBD' 'NR-AhR' ... 'SR-HSE' 'SR-MMP' 'SR-p53']>,
 <DiskDataset X.shape: (783, 1024), y.shape: (783, 12), w.shape: (783, 12), ids: ['N#C[C@@H]1CC(F)(F)CN1C(=O)CNC1CC2CCC(C1)N2c1ncccn1'
  'CN(C)C(=O)NC1(c2ccccc2)CCN(CCC[C@@]2(c3ccc(Cl)c(Cl)c3)CCCN(C(=O)c3ccccc3)C2)CC1'
  'CSc1nnc(C(C)(C)C)c(=O)n1N' ...
  'O=C(O[C@H]1CN2CCC1CC2)N1CCc2ccccc2[C@@H]1c1ccccc1'
  'C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3C(=C)C[C@@]21CC'
  'NC(=O)C(c1ccccc1)(c1ccccc1)[C@@H]1CCN(CCc2ccc3c(c2)CCO3)C1'], task_names: ['NR-AR' 'NR-AR-LBD' 'NR-AhR' ... 'SR-HSE' 'SR-MMP' 'SR-p53']>,
 <DiskDataset X.shape: (784, 1024), y.shape: (784, 12), w.shape: (784, 12), ids: ['CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@H]1C(=O)O.CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@H]1C(=O)O.c1ccc(CNCCNCc2ccccc2)cc1'
  'CC(C)(c1ccc(Oc2ccc3c(c2)C(=O)OC3=O)cc1)c1ccc(Oc2ccc3c(c2)C(=O)OC3=O)cc1'
  'Cc1cc(C(C)(C)C)c(O)c(C)c1

In [21]:
type(tox21_datasets[0])

deepchem.data.datasets.DiskDataset

In [22]:
train_dataset, valid_dataset, test_dataset = tox21_datasets

In [23]:
train_dataset.X.shape, valid_dataset.X.shape, test_dataset.X.shape 
#the samples correspond to molecules

((6264, 1024), (783, 1024), (784, 1024))

In [24]:
train_dataset.y.shape, valid_dataset.y.shape, test_dataset.y.shape

((6264, 12), (783, 12), (784, 12))

In [25]:
#'w' field records weights. 
#It helps us to find labels which were actually measured
train_dataset.w.shape

(6264, 12)

In [26]:
np.count_nonzero(train_dataset.w)

63647

In [27]:
np.count_nonzero(train_dataset.w==0)

11521

In [28]:
transformers

[<deepchem.trans.transformers.BalancingTransformer at 0x7fd9297df450>]

In [29]:
model = dc.models.MultitaskClassifier(n_tasks=12,
    n_features=1024,
    layer_sizes=[1000]) #single hidden layer of width 1000

In [30]:
model.fit(train_dataset, nb_epoch=10)

0.483587106068929

In [31]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

In [32]:
train_scores = model.evaluate(train_dataset,[metric],transformers)

In [33]:
train_scores

{'mean-roc_auc_score': 0.958677043492781}

In [34]:
test_scores = model.evaluate(test_dataset, [metric], transformers)

In [35]:
test_scores

{'mean-roc_auc_score': 0.684632436475756}

In [36]:
from rdkit import Chem

In [37]:
import tensorflow as tf