Skip to content

Commit

Permalink
Merge pull request #1994 from deepchem/logging
Browse files Browse the repository at this point in the history
Remove verbose keyword and switch to logging in model classes
  • Loading branch information
Bharath Ramsundar committed Jul 9, 2020
2 parents 0790947 + adeaa4d commit 8e069fe
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 38 deletions.
21 changes: 7 additions & 14 deletions deepchem/models/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""
Contains an abstract base class that supports different ML models.
"""
__author__ = "Bharath Ramsundar and Joseph Gomes"
__copyright__ = "Copyright 2016, Stanford University"
__license__ = "MIT"

import sys
import numpy as np
Expand All @@ -15,24 +12,22 @@
import sklearn
from sklearn.base import BaseEstimator

import logging
from deepchem.data import Dataset, pad_features
from deepchem.trans import undo_transforms
from deepchem.utils.save import load_from_disk
from deepchem.utils.save import save_to_disk
from deepchem.utils.save import log
from deepchem.utils.evaluate import Evaluator

logger = logging.getLogger(__name__)


class Model(BaseEstimator):
"""
Abstract base class for different ML models.
"""

def __init__(self,
model_instance=None,
model_dir=None,
verbose=True,
**kwargs):
def __init__(self, model_instance=None, model_dir=None, **kwargs):
"""Abstract class for all models.
Parameters
Expand All @@ -53,8 +48,6 @@ def __init__(self,
self.model_instance = model_instance
self.model_class = model_instance.__class__

self.verbose = verbose

def __del__(self):
if 'model_dir_is_temp' in dir(self) and self.model_dir_is_temp:
shutil.rmtree(self.model_dir)
Expand Down Expand Up @@ -113,13 +106,13 @@ def fit(self, dataset, nb_epoch=10, batch_size=50, **kwargs):
# TODO(rbharath/enf): We need a structured way to deal with potential GPU
# memory overflows.
for epoch in range(nb_epoch):
log("Starting epoch %s" % str(epoch + 1), self.verbose)
logger.info("Starting epoch %s" % str(epoch + 1))
losses = []
for (X_batch, y_batch, w_batch,
ids_batch) in dataset.iterbatches(batch_size):
losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
log("Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()),
self.verbose)
logger.info(
"Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()))

def predict(self, dataset, transformers=[], batch_size=None):
"""
Expand Down
12 changes: 9 additions & 3 deletions deepchem/models/progressive_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,30 @@
import tensorflow as tf
import collections

from deepchem.utils.save import log
import logging
from deepchem.metrics import to_one_hot
from deepchem.metrics import from_one_hot
from deepchem.models import KerasModel, layers
from deepchem.models.losses import L2Loss, SparseSoftmaxCrossEntropy
from deepchem.models.keras_model import _StandardLoss
from tensorflow.keras.layers import Input, Dense, Dropout, ReLU, Concatenate, Add, Multiply, Softmax

logger = logging.getLogger(__name__)


class ProgressiveMultitaskRegressor(KerasModel):
"""Implements a progressive multitask neural network for regression.
Progressive Networks: https://arxiv.org/pdf/1606.04671v3.pdf
Progressive networks allow for multitask learning where each task
gets a new column of weights. As a result, there is no exponential
forgetting where previous tasks are ignored.
References
----------
See [1]_ for a full description of the progressive architecture
.. [1] Rusu, Andrei A., et al. "Progressive neural networks." arXiv preprint
arXiv:1606.04671 (2016).
"""

def __init__(self,
Expand Down
25 changes: 21 additions & 4 deletions deepchem/models/robust_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,28 @@
import tensorflow as tf
import collections

import logging
from deepchem.metrics import to_one_hot
from deepchem.models import KerasModel
from deepchem.models.layers import Stack
from deepchem.models.losses import SoftmaxCrossEntropy, L2Loss

logger = logging.getLogger(__name__)


class RobustMultitaskClassifier(KerasModel):
"""Implements a neural network for robust multitasking.
Key idea is to have bypass layers that feed directly from features to task
output. Hopefully will allow tasks to route around bad multitasking.
The key idea of this model is to have bypass layers that feed
directly from features to task output. This might provide some
flexibility toroute around challenges in multitasking with
destructive interference.
References
----------
This technique was introduced in [1]_
.. [1] Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.
"""

Expand Down Expand Up @@ -194,8 +205,14 @@ def create_estimator_inputs(self, feature_columns, weight_column, features,
class RobustMultitaskRegressor(KerasModel):
"""Implements a neural network for robust multitasking.
Key idea is to have bypass layers that feed directly from features to task
output. Hopefully will allow tasks to route around bad multitasking.
The key idea of this model is to have bypass layers that feed
directly from features to task output. This might provide some
flexibility toroute around challenges in multitasking with
destructive interference.
References
----------
.. [1] Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.
"""

Expand Down
12 changes: 4 additions & 8 deletions deepchem/models/sklearn_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,19 @@ class SklearnModel(Model):
Abstract base class for different ML models.
"""

def __init__(self,
model_instance=None,
model_dir=None,
verbose=True,
**kwargs):
def __init__(self, model_instance=None, model_dir=None, **kwargs):
"""
Parameters
----------
model_instance: sklearn model
Instance of model to wrap.
model_dir: str
verbose: bool
If specified, the model will be saved in this directory.
kwargs: dict
kwargs['use_weights'] is a bool which determines if we pass weights into
self.model_instance.fit()
"""
super(SklearnModel, self).__init__(model_instance, model_dir, verbose,
**kwargs)
super(SklearnModel, self).__init__(model_instance, model_dir, **kwargs)
if 'use_weights' in kwargs:
self.use_weights = kwargs['use_weights']
else:
Expand Down
13 changes: 4 additions & 9 deletions deepchem/models/xgboost_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ class XGBoostModel(SklearnModel):
Abstract base class for XGBoost model.
"""

def __init__(self,
model_instance=None,
model_dir=None,
verbose=False,
**kwargs):
def __init__(self, model_instance=None, model_dir=None, **kwargs):
"""Abstract class for XGBoost models.
Parameters
Expand All @@ -40,7 +36,6 @@ def __init__(self,
self.model_instance = model_instance
self.model_class = model_instance.__class__

self.verbose = verbose
if 'early_stopping_rounds' in kwargs:
self.early_stopping_rounds = kwargs['early_stopping_rounds']
else:
Expand Down Expand Up @@ -77,13 +72,13 @@ def fit(self, dataset, **kwargs):
y_train,
early_stopping_rounds=self.early_stopping_rounds,
eval_metric=xgb_metric,
eval_set=[(X_train, y_train), (X_test, y_test)],
verbose=self.verbose)
eval_set=[(X_train, y_train), (X_test, y_test)])

# Since test size is 20%, when retrain model to whole data, expect
# n_estimator increased to 1/0.8 = 1.25 time.
estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25)
self.model_instance.n_estimators = np.int64(estimated_best_round)
self.model_instance.fit(X, y, eval_metric=xgb_metric, verbose=self.verbose)
self.model_instance.fit(X, y, eval_metric=xgb_metric)

def _search_param(self, metric, X, y):
'''
Expand Down

0 comments on commit 8e069fe

Please sign in to comment.