## figure out what python interpreter we're using!

In [1]:
import sys
sys.executable

'/Users/t/miniconda3/envs/deepchem/bin/python'

## import the deepchem package

In [2]:
import deepchem

  from numpy.core.umath_tests import inner1d


In [3]:
import deepchem as dc
import numpy as np

In [4]:
x = np.random.random((4, 5))
y = np.random.random((4, 1))

In [5]:
dataset = dc.data.NumpyDataset(x, y)

In [6]:
print(dataset.X)

[[0.6250064  0.52161498 0.18906039 0.50223102 0.99218686]
 [0.62883743 0.84585718 0.10659265 0.18139691 0.4332577 ]
 [0.70047656 0.08372012 0.33341726 0.51379063 0.38800355]
 [0.88272982 0.46556773 0.18703224 0.41570784 0.34654327]]


In [8]:
print(dataset.y)

[[0.35733986]
 [0.1030221 ]
 [0.72971561]
 [0.32522437]]


In [10]:
## load in a toxicity data set

In [9]:
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(reload=False)

Loading raw samples now.
shard_size: 8192
About to start loading CSV from /var/folders/y6/s1r3q87933s6gjdrtmts1npc0000gq/T/tox21.csv.gz
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
Featurizing sample 4000
Featurizing sample 5000
Featurizing sample 6000
Featurizing sample 7000
TIMING: featurizing shard 0 took 83.346 s
TIMING: dataset construction took 84.835 s
Loading dataset from disk.
TIMING: dataset construction took 2.820 s
Loading dataset from disk.
TIMING: dataset construction took 2.317 s
Loading dataset from disk.
TIMING: dataset construction took 1.066 s
Loading dataset from disk.
TIMING: dataset construction took 1.409 s
Loading dataset from disk.


In [11]:
tox21_tasks

[u'NR-AR',
 u'NR-AR-LBD',
 u'NR-AhR',
 u'NR-Aromatase',
 u'NR-ER',
 u'NR-ER-LBD',
 u'NR-PPAR-gamma',
 u'SR-ARE',
 u'SR-ATAD5',
 u'SR-HSE',
 u'SR-MMP',
 u'SR-p53']

In [12]:
tox21_datasets

(<deepchem.data.datasets.DiskDataset at 0x10ae6ff10>,
 <deepchem.data.datasets.DiskDataset at 0x1a2d58ff10>,
 <deepchem.data.datasets.DiskDataset at 0x10ae6c150>)

In [13]:
train_dataset, valid_dataset, test_dataset = tox21_datasets

In [14]:
train_dataset.X.shape

(6264, 1024)

In [18]:
train_dataset.y.shape

(6264, 12)

In [16]:
valid_dataset.X.shape

(783, 1024)

In [17]:
valid_dataset.y.shape

(783, 12)

In [19]:
test_dataset.X.shape

(784, 1024)

In [20]:
test_dataset.y.shape

(784, 12)

In [21]:
train_dataset.w.shape

(6264, 12)

## weights ignore zero entries

In [23]:
train_dataset.w.shape

(6264, 12)

In [24]:
np.count_nonzero(train_dataset.w)

62166

In [25]:
6264*12

75168

## OK so what does a Transformer do

In [26]:
train_dataset.X.shape

(6264, 1024)

In [27]:
np.count_nonzero(train_dataset.X)

184664

In [31]:
np.count_nonzero(train_dataset.X == 0)

6229672

In [29]:
6264*1024 - 184664

6229672

In [22]:
transformers

[<deepchem.trans.transformers.BalancingTransformer at 0x10b07f7d0>]

so, what's going on here is that (without a transformer) our objective function is going to prioritize the trivially 90% "correct" thing of saying, most ... well, let's just say we think the book is confusing and we will move on for now.

(Titus question: what's the difference between weighting and balancing transformers?)

In [35]:
#dc.trans.transformers.BalancingTransformer??

In [36]:
model = dc.models.MultitaskClassifier(n_tasks=12,
                                      n_features=1024,
                                      layer_sizes=[1000])

In [37]:
model.fit(train_dataset, nb_epoch=10)

866.282998608786

(here there is some question over batch size, and what exactly epoch means.)

In [38]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

In [39]:
train_scores = model.evaluate(train_dataset, [metric], transformers)
test_scores = model.evaluate(test_dataset, [metric], transformers)

computed_metrics: [0.9904099710348984, 0.9960189868336544, 0.9601443886324939, 0.9805511014958632, 0.9004765229340514, 0.9842199148597459, 0.9911510618994931, 0.9099619224557087, 0.9873910796684366, 0.9721128324341204, 0.9469964504422851, 0.9744951613805448]
computed_metrics: [0.7903707802988379, 0.8595515090204013, 0.899861392472545, 0.8046891354406531, 0.7103204862851042, 0.7991324657991326, 0.7232704402515724, 0.7198543995672557, 0.8610148767067454, 0.7181938035596571, 0.8638385433987362, 0.7871858832224685]


In [40]:
print(train_scores)
print(test_scores)

{'mean-roc_auc_score': 0.9661607828392746}
{'mean-roc_auc_score': 0.7947736430019258}


Luiz made a more complicated model with 3 layers, and it got worse.

In [42]:
luiz_model = dc.models.MultitaskClassifier(n_tasks=12,
                                      n_features=1024,
                                      layer_sizes=[1000, 1000, 1000])
luiz_model.fit(train_dataset, nb_epoch=10)

luiz_train_scores = luiz_model.evaluate(train_dataset, [metric], transformers)
luiz_test_scores = luiz_model.evaluate(test_dataset, [metric], transformers)

print(luiz_train_scores)
print(luiz_test_scores)

computed_metrics: [0.9293118029060556, 0.9691403741168915, 0.8425281397055282, 0.8948347135078729, 0.7562060647701794, 0.8937324549247028, 0.9069461606592419, 0.7915720586143122, 0.916733799405636, 0.8168919036136217, 0.86342208271813, 0.8953717066621916]
computed_metrics: [0.7187603763143331, 0.8733771707975047, 0.8396844013221025, 0.7833368222733933, 0.6836750255996862, 0.8220887554220888, 0.8171135385633896, 0.7195839343671115, 0.8515046532164934, 0.678930454845089, 0.8105046558031261, 0.7979028085735402]
{'mean-roc_auc_score': 0.873057605133697}
{'mean-roc_auc_score': 0.7830385497581549}


In [43]:
luiz_model = dc.models.MultitaskClassifier(n_tasks=12,
                                      n_features=1024,
                                      layer_sizes=[3000])
luiz_model.fit(train_dataset, nb_epoch=10)

luiz_train_scores = luiz_model.evaluate(train_dataset, [metric], transformers)
luiz_test_scores = luiz_model.evaluate(test_dataset, [metric], transformers)

print(luiz_train_scores)
print(luiz_test_scores)

computed_metrics: [0.9924242537235517, 0.9973948298008992, 0.967072011581688, 0.9858192082837229, 0.9123408357990626, 0.9872467290649496, 0.9930207264905495, 0.9230685479484153, 0.9892058385473577, 0.9772881355932204, 0.9555313302940376, 0.9790770263467025]
computed_metrics: [0.7784910533111973, 0.8280222559433484, 0.8886874933361766, 0.8018630939920453, 0.7100154687465958, 0.790256923590257, 0.6840450182058921, 0.682710512080779, 0.8621696895591331, 0.6843688200395517, 0.8594321582973063, 0.7391444937176643]
{'mean-roc_auc_score': 0.971624122789513}
{'mean-roc_auc_score': 0.7757672484016621}


In [44]:
luiz_model = dc.models.MultitaskClassifier(n_tasks=12,
                                      n_features=1024,
                                      layer_sizes=[1000, 1000, 1000])
luiz_model.fit(train_dataset, nb_epoch=20)

luiz_train_scores = luiz_model.evaluate(train_dataset, [metric], transformers)
luiz_test_scores = luiz_model.evaluate(test_dataset, [metric], transformers)

print(luiz_train_scores)
print(luiz_test_scores)

computed_metrics: [0.9843643263898625, 0.9956035244059087, 0.9488229025897311, 0.9727965782628722, 0.8286630391820059, 0.9677049797809665, 0.9922332867599748, 0.840584804130786, 0.9829446594238707, 0.9400770258314106, 0.9242386271794905, 0.9659797505174247]
computed_metrics: [0.7528131341080981, 0.8679817905918057, 0.8793901268791982, 0.8326878794222315, 0.7091657770321793, 0.8368034701368035, 0.7831016219794771, 0.7399815182113234, 0.8936213572447524, 0.6746044825313118, 0.867974725640173, 0.7912509238728751]
{'mean-roc_auc_score': 0.9453344587045254}
{'mean-roc_auc_score': 0.8024480673041857}
