In [1]:
"""Generative Adversarial Networks."""

from deepchem.models import TensorGraph
from deepchem.models.tensorgraph import layers
from collections import Sequence
import numpy as np
import tensorflow as tf
import time
import deepchem as dc
from deepchem.data.datasets import NumpyDataset # import NumpyDataset



In [2]:
n_features = 1024
hiv_tasks, hiv_datasets, hiv_transformers = dc.molnet.load_hiv(featurizer='ECFP', split='scaffold', reload=True)
hiv_train_dataset, hiv_valid_dataset, hiv_test_dataset = hiv_datasets

metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.


In [3]:
n_features = 1024
rhiv_tasks, rhiv_datasets, rhiv_transformers = dc.molnet.load_hiv(featurizer='ECFP', split='random', reload=True)
rhiv_train_dataset, rhiv_valid_dataset, rhiv_test_dataset = rhiv_datasets

metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.


In [4]:
print("woop")

woop


In [5]:
multitask_model = dc.models.MultitaskClassifier(
    1,
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)
multitask_model.fit(hiv_train_dataset)

Instructions for updating:
Colocations handled automatically by placer.


Instructions for updating:
Colocations handled automatically by placer.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


9.480679180894233

In [6]:
print("Evaluating model")
train_scores = multitask_model.evaluate(hiv_train_dataset, [metric], hiv_transformers)
valid_scores = multitask_model.evaluate(hiv_valid_dataset, [metric], hiv_transformers)
test_scores = multitask_model.evaluate(hiv_test_dataset, [metric], hiv_transformers)

print("HIV Train scores")
print(train_scores)

print("HIV Validation scores")
print(valid_scores)    
    
print("HIV Test scores")
print(test_scores)

Evaluating model
computed_metrics: [0.9935353917530889]
computed_metrics: [0.753899360670194]
computed_metrics: [0.7411160895343674]
HIV Train scores
{'mean-roc_auc_score': 0.9935353917530889}
HIV Validation scores
{'mean-roc_auc_score': 0.753899360670194}
HIV Test scores
{'mean-roc_auc_score': 0.7411160895343674}


In [6]:
prog_model = dc.models.ProgressiveMultitaskClassifier(
    1,
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)
prog_model.fit(hiv_train_dataset)


In [7]:
print("Evaluating model")
train_scores = prog_model.evaluate(hiv_train_dataset, [metric], hiv_transformers)
valid_scores = prog_model.evaluate(hiv_valid_dataset, [metric], hiv_transformers)
test_scores = prog_model.evaluate(hiv_test_dataset, [metric], hiv_transformers)

print("HIV Train scores")
print(train_scores)

print("HIV Validation scores")
print(valid_scores)    
    
print("HIV Test scores")
print(test_scores)

Evaluating model
computed_metrics: [0.9954860810666172]
computed_metrics: [0.7482975700568293]
computed_metrics: [0.7406366480619556]
HIV Train scores
{'mean-roc_auc_score': 0.9954860810666172}
HIV Validation scores
{'mean-roc_auc_score': 0.7482975700568293}
HIV Test scores
{'mean-roc_auc_score': 0.7406366480619556}


In [8]:
robust_model = dc.models.RobustMultitaskClassifier(
    1,
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)
robust_model.fit(hiv_train_dataset)

9.554179872050502

In [9]:
print("Evaluating model")
train_scores = robust_model.evaluate(hiv_train_dataset, [metric], hiv_transformers)
valid_scores = robust_model.evaluate(hiv_valid_dataset, [metric], hiv_transformers)
test_scores = robust_model.evaluate(hiv_test_dataset, [metric], hiv_transformers)

print("HIV Train scores")
print(train_scores)

print("HIV Validation scores")
print(valid_scores)    
    
print("HIV Test scores")
print(test_scores)

Evaluating model
computed_metrics: [0.9959683421823053]
computed_metrics: [0.7495652067411327]
computed_metrics: [0.7474299426408391]
HIV Train scores
{'mean-roc_auc_score': 0.9959683421823053}
HIV Validation scores
{'mean-roc_auc_score': 0.7495652067411327}
HIV Test scores
{'mean-roc_auc_score': 0.7474299426408391}


In [10]:
import os
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_tox21
from sklearn.linear_model import LogisticRegression

def model_builder(model_dir_logreg):
  sklearn_model = LogisticRegression(
      penalty="l2", C=1. / 0.05, class_weight="balanced", n_jobs=-1)
  return dc.models.sklearn_models.SklearnModel(sklearn_model, model_dir_logreg)


log_model = dc.models.multitask.SingletaskToMultitask(hiv_tasks, model_builder)

# Fit trained model
log_model.fit(hiv_train_dataset)

print("Evaluating model")
train_scores = log_model.evaluate(hiv_train_dataset, [metric], hiv_transformers)
valid_scores = log_model.evaluate(hiv_valid_dataset, [metric], hiv_transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)

About to initialize singletask to multitask model
Initializing directory for task HIV_active
About to create task-specific datasets
Splitting multitask dataset into singletask datasets
TIMING: dataset construction took 0.005 s
Loading dataset from disk.
Processing shard 0
	Task HIV_active
Processing shard 1
	Task HIV_active
Processing shard 2
	Task HIV_active
Processing shard 3
	Task HIV_active
Processing shard 4
	Task HIV_active
Processing shard 5
	Task HIV_active
Dataset for task HIV_active has shape ((32901, 1024), (32901, 1), (32901, 1), (32901,))
Fitting model for task HIV_active


  " = {}.".format(self.n_jobs))


Evaluating model
computed_metrics: [0.929515587470725]
computed_metrics: [0.7572766632373114]
Train scores
{'mean-roc_auc_score': 0.929515587470725}
Validation scores
{'mean-roc_auc_score': 0.7572766632373114}


In [19]:
multicorrect = multitask_model.predict(hiv_test_dataset, hiv_transformers)[:, 0][:,1]
progcorrect = prog_model.predict(hiv_test_dataset, hiv_transformers)[:, 0][:,1]
robcorrect = robust_model.predict(hiv_test_dataset, hiv_transformers)[:, 0][:,1]
logcorrect = log_model.predict(hiv_test_dataset, hiv_transformers)[:, 0][:,1]

rprogcorrect = np.rint(progcorrect)
rrobcorrect = np.rint(robcorrect)
rmulticorrect = np.rint(multicorrect)
rlogcorrect = np.rint(logcorrect)

In [20]:
multiprog = np.logical_and(rmulticorrect, rprogcorrect)
multirob = np.logical_and(rmulticorrect, rrobcorrect)
multilog = np.logical_and(rmulticorrect, rlogcorrect)

progrob = np.logical_and(rprogcorrect, rrobcorrect)
proglog = np.logical_and(rprogcorrect, rlogcorrect)

roblog = np.logical_and(rrobcorrect, rlogcorrect)

every = np.logical_and(multiprog, multirob)
every = np.logical_and(every, multirob)
every = np.logical_and(every, multilog)

every = np.logical_and(every, progrob)
every = np.logical_and(every, proglog)

every = np.logical_and(every, roblog)

every2 = np.logical_and(multiprog, multirob)
every2 = np.logical_and(every2, multirob)

every2 = np.logical_and(every2, progrob)


In [24]:
print("Multitask", np.count_nonzero(rmulticorrect))
print("Progressive", np.count_nonzero(rprogcorrect))
print("Robust", np.count_nonzero(rrobcorrect))
print("LogModel", np.count_nonzero(rlogcorrect))
print("Intersection of models")
print(np.count_nonzero(multiprog))
print(np.count_nonzero(multirob))
print(np.count_nonzero(multilog))

print(np.count_nonzero(progrob))
print(np.count_nonzero(proglog))

print(np.count_nonzero(roblog))
print("All of them")
print(np.count_nonzero(every))
print(np.count_nonzero(every2))


Multitask 99
Progressive 175
Robust 204
LogModel 797
Intersection of models
79
78
83
121
137
161
All of them
62
72


In [31]:
test = np.logical_and(rmulticorrect, hiv_test_dataset.y)

In [50]:
ticker = 0
for i in range(0, len(every2)):
    if every2[i] == 1:
        if hiv_test_dataset.y[i] == 1:
            print(i)
            ticker+= 1
print(ticker)

140
443
444
445
446
463
468
470
685
695
697
698
699
707
740
741
742
801
906
946
947
948
949
950
951
975
977
1102
1191
1192
1475
3554
3774
33
