# **Machine Learning With DeepChem**

# Installing the DeepChem Dependencies

In [5]:
%tensorflow_version 1.x
!curl -Lo deepchem_installer.py https://raw.githubusercontent.com/deepchem/deepchem/master/scripts/colab_install.py
import deepchem_installer

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  3490  100  3490    0     0  50579      0 --:--:-- --:--:-- --:--:-- 50579


# Training a model to predict toxicity of molecules

In [7]:
import numpy as np

In [8]:
!wget -c https://repo.anaconda.com/archive/Anaconda3-2019.10-Linux-x86_64.sh
!chmod +x Anaconda3-2019.10-Linux-x86_64.sh
!bash ./Anaconda3-2019.10-Linux-x86_64.sh -b -f -p /usr/local
!conda install -y -c deepchem -c rdkit -c conda-forge -c omnia deepchem-gpu=2.3.0
import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')
import deepchem as dc

--2020-09-20 07:33:21--  https://repo.anaconda.com/archive/Anaconda3-2019.10-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.131.3, 104.16.130.3, 2606:4700::6810:8303, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.131.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 530308481 (506M) [application/x-sh]
Saving to: ‘Anaconda3-2019.10-Linux-x86_64.sh’


2020-09-20 07:34:20 (8.75 MB/s) - ‘Anaconda3-2019.10-Linux-x86_64.sh’ saved [530308481/530308481]

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ | / - \ | / done
Solving environment: \ | / - \ | / - \ | / - \ | / - \ | done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - _ipyw_jlab_nb_ext_conf==0.1.0=py37_0
    - _libgcc_mutex==0.1=main
    - alabaster==0.7.12=py37_0
    - anaconda-client==1.7.2=py37_0
    - anaconda-navigator==1.9



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [9]:
import numpy as np
import deepchem as dc

In [10]:
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21()

Loading raw samples now.
shard_size: 8192
About to start loading CSV from /tmp/tox21.csv.gz
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
Featurizing sample 4000
Featurizing sample 5000
Featurizing sample 6000
Featurizing sample 7000
TIMING: featurizing shard 0 took 24.204 s
TIMING: dataset construction took 24.549 s
Loading dataset from disk.
TIMING: dataset construction took 0.409 s
Loading dataset from disk.
TIMING: dataset construction took 0.210 s
Loading dataset from disk.
TIMING: dataset construction took 0.202 s
Loading dataset from disk.
TIMING: dataset construction took 0.336 s
Loading dataset from disk.
TIMING: dataset construction took 0.052 s
Loading dataset from disk.
TIMING: dataset construction took 0.050 s
Loading dataset from disk.


In [11]:
tox21_tasks

['NR-AR',
 'NR-AR-LBD',
 'NR-AhR',
 'NR-Aromatase',
 'NR-ER',
 'NR-ER-LBD',
 'NR-PPAR-gamma',
 'SR-ARE',
 'SR-ATAD5',
 'SR-HSE',
 'SR-MMP',
 'SR-p53']

In [12]:
len(tox21_tasks)

12

In [13]:
tox21_datasets
train, valid, test = tox21_datasets

In [15]:
print(train.X.shape)
print(valid.X.shape)
print(test.X.shape)

(6264, 1024)
(783, 1024)
(784, 1024)


In [16]:
print(train.y.shape)
print(valid.y.shape)
print(test.y.shape)

(6264, 12)
(783, 12)
(784, 12)


In [17]:
print(train.X.shape)

(6264, 1024)


In [18]:
np.count_nonzero(train.w)

62166

In [19]:
np.count_nonzero(train.w==0)

13002

# Modelling

In [20]:
model = dc.models.MultitaskClassifier(n_tasks=12,n_features=1024,layer_sizes=[1000])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [21]:
model.fit(train, nb_epoch=10)










0.09215377395351727

In [22]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

In [23]:
train_scores = model.evaluate(train, [metric], transformers)

computed_metrics: [0.9906540191688715, 0.997542850834939, 0.9749852856183652, 0.9861706224450207, 0.9249515165619424, 0.986110341200634, 0.9892241901485233, 0.9354593156084456, 0.9922724852626964, 0.9777718023003661, 0.968324687421033, 0.9822470213123007]


In [24]:
test_scores = model.evaluate(test, [metric], transformers)

computed_metrics: [0.7723667220070097, 0.8185241387062328, 0.8488751466041156, 0.7895122461796107, 0.705309483866751, 0.7795795795795796, 0.6675769612711023, 0.6545032455824018, 0.8441681950954418, 0.6976763348714569, 0.8354724393082806, 0.7025475794530673]


In [25]:
print("Training data Score: ",train_scores)
print("Test data Score: ",test_scores)

Training data Score:  {'mean-roc_auc_score': 0.975476178156928}
Test data Score:  {'mean-roc_auc_score': 0.7596760060437542}
