Skip to content

Commit

Permalink
Merge 4152d36 into 0b8b134
Browse files Browse the repository at this point in the history
  • Loading branch information
rbharath committed Jul 17, 2020
2 parents 0b8b134 + 4152d36 commit 2062007
Show file tree
Hide file tree
Showing 21 changed files with 3,878 additions and 2,530 deletions.
867 changes: 645 additions & 222 deletions deepchem/metrics/__init__.py

Large diffs are not rendered by default.

89 changes: 0 additions & 89 deletions deepchem/metrics/tests/metrics_test.py

This file was deleted.

72 changes: 72 additions & 0 deletions deepchem/metrics/tests/test_metrics.py
@@ -0,0 +1,72 @@
"""
Tests for metricsT.
"""
import numpy as np
import deepchem as dc
import unittest
from deepchem import metrics


def test_kappa_score():
y_true = [1, 0, 1, 0]
y_pred = [0.8, 0.2, 0.3, 0.4] # [1, 0, 0, 0] with 0.5 threshold
kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
observed_agreement = 3.0 / 4.0
expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
expected_kappa = np.true_divide(observed_agreement - expected_agreement,
1.0 - expected_agreement)
np.testing.assert_almost_equal(kappa, expected_kappa)


def test_one_sample():
"""Test that the metrics won't raise error even in an extreme condition
where there is only one sample with w > 0.
"""
np.random.seed(123)
n_samples = 2
y_true = np.random.randint(2, size=(n_samples,))
y_pred = np.random.randint(2, size=(n_samples,))
w = np.array([0, 1])
all_metrics = [
dc.metrics.Metric(dc.metrics.recall_score),
dc.metrics.Metric(dc.metrics.matthews_corrcoef),
dc.metrics.Metric(dc.metrics.roc_auc_score)
]
for metric in all_metrics:
score = metric.compute_singletask_metric(y_true, y_pred, w)


def test_r2_score():
"""Test that R^2 metric passes basic sanity tests"""
np.random.seed(123)
n_samples = 10
y_true = np.random.rand(n_samples,)
y_pred = np.random.rand(n_samples,)
regression_metric = dc.metrics.Metric(dc.metrics.r2_score, n_tasks=1)
assert np.isclose(
dc.metrics.r2_score(y_true, y_pred),
regression_metric.compute_metric(y_true, y_pred))


def test_bedroc_score():
"""Test BEDROC."""
num_actives = 20
num_total = 400

y_true_actives = np.ones(num_actives)
y_true_inactives = np.zeros(num_total - num_actives)
y_true = np.concatenate([y_true_actives, y_true_inactives])

# Best score case
y_pred_best = dc.metrics.to_one_hot(
np.concatenate([y_true_actives, y_true_inactives]))
best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
np.testing.assert_almost_equal(best_score, 1.0)

# Worst score case
worst_pred_actives = np.zeros(num_actives)
worst_pred_inactives = np.ones(num_total - num_actives)
y_pred_worst = dc.metrics.to_one_hot(
np.concatenate([worst_pred_actives, worst_pred_inactives]))
worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
np.testing.assert_almost_equal(worst_score, 0.0, 4)
194 changes: 194 additions & 0 deletions deepchem/metrics/tests/test_normalize.py
@@ -0,0 +1,194 @@
"""Test normalization of input."""

import numpy as np
import unittest
import deepchem as dc
from deepchem.metrics import to_one_hot
from deepchem.metrics import from_one_hot
from deepchem.metrics import threshold_predictions
from deepchem.metrics import handle_classification_mode
from deepchem.metrics import normalize_prediction_shape
from deepchem.metrics import normalize_weight_shape


def test_one_hot():
"""Test the one hot encoding."""
y = np.array([0, 0, 1, 0, 1, 1, 0])
y_hot = to_one_hot(y)
expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0]])
yp = from_one_hot(y_hot)
assert np.array_equal(expected, y_hot)
assert np.array_equal(y, yp)


def test_handle_classification_mode_none():
"""Test proper thresholding."""
y = np.random.rand(10, 2)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y = np.expand_dims(y, 1)
y_expected = y
y_out = handle_classification_mode(y, None)
assert y_out.shape == (10, 1, 2)
assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold():
"""Test proper thresholding."""
y = np.random.rand(10, 2)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y = np.expand_dims(y, 1)
y_expected = np.argmax(np.squeeze(y), axis=1)[:, np.newaxis]
y_out = handle_classification_mode(y, "threshold", threshold_value=0.5)
assert y_out.shape == (10, 1)
assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold_nonstandard():
"""Test proper thresholding."""
y = np.random.rand(10, 2)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y_expected = np.where(y[:, 1] >= 0.3, np.ones(10),
np.zeros(10))[:, np.newaxis]
y = np.expand_dims(y, 1)
y_out = handle_classification_mode(y, "threshold", threshold_value=0.3)
assert y_out.shape == (10, 1)
assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold_one_hot():
"""Test proper thresholding."""
y = np.random.rand(10, 2)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y = np.expand_dims(y, 1)
y_expected = np.expand_dims(
to_one_hot(np.argmax(np.squeeze(y), axis=1), n_classes=2), 1)
y_out = handle_classification_mode(
y, "threshold-one-hot", threshold_value=0.5)
assert y_out.shape == (10, 1, 2)
assert np.array_equal(y_out, y_expected)


def test_threshold_predictions_binary():
"""Test thresholding of binary predictions."""
# Get a random prediction matrix
y = np.random.rand(10, 2)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y_thresh = threshold_predictions(y, 0.5)
assert y_thresh.shape == (10,)
assert (y_thresh == np.argmax(y, axis=1)).all()


def test_threshold_predictions_multiclass():
"""Test thresholding of multiclass predictions."""
y = np.random.rand(10, 5)
y = y / np.sum(y, axis=1)[:, np.newaxis]
y_thresh = threshold_predictions(y)
assert y_thresh.shape == (10,)
assert (y_thresh == np.argmax(y, axis=1)).all()


def test_normalize_1d_classification_binary():
"""Tests 1d classification normalization."""
y = np.array([0, 0, 1, 0, 1, 1, 0])
expected = np.array([[[1., 0.]], [[1., 0.]], [[0., 1.]], [[1., 0.]],
[[0., 1.]], [[0., 1.]], [[1., 0.]]])
y_out = normalize_prediction_shape(
y, mode="classification", n_tasks=1, n_classes=2)
assert y_out.shape == (7, 1, 2)
assert np.array_equal(expected, y_out)


def test_normalize_1d_classification_multiclass():
"""Tests 1d classification normalization."""
y = np.random.randint(5, size=(200,))
y_expected = np.expand_dims(to_one_hot(y, n_classes=5), 1)
y_out = normalize_prediction_shape(
y, mode="classification", n_tasks=1, n_classes=5)
assert y_out.shape == (200, 1, 5)
assert np.array_equal(y_expected, y_out)


def test_normalize_1d_classification_multiclass_explicit_nclasses():
"""Tests 1d classification normalization."""
y = np.random.randint(5, size=(10,))
y_expected = np.expand_dims(to_one_hot(y, n_classes=10), 1)
y_out = normalize_prediction_shape(
y, mode="classification", n_classes=10, n_tasks=1)
assert y_out.shape == (10, 1, 10)
assert np.array_equal(y_expected, y_out)


def test_normalize_2d_classification_binary():
"""Tests 2d classification normalization."""
# Of shape (N, n_classes)
y = np.random.randint(2, size=(10, 1))
y_expected = np.expand_dims(dc.metrics.to_one_hot(np.squeeze(y)), 1)
y_out = normalize_prediction_shape(
y, mode="classification", n_tasks=1, n_classes=2)
assert y_out.shape == (10, 1, 2)
assert np.array_equal(y_expected, y_out)


def test_normalize_3d_classification_binary():
"""Tests 1d classification normalization."""
# Of shape (N, 1, n_classes)
y = np.random.randint(2, size=(10,))
y = dc.metrics.to_one_hot(y, n_classes=2)
y = np.expand_dims(y, 1)
y_expected = y
y_out = normalize_prediction_shape(
y, mode="classification", n_tasks=1, n_classes=2)
assert y_out.shape == (10, 1, 2)
assert np.array_equal(y_expected, y_out)


def test_normalize_1d_regression():
"""Tests 1d regression normalization."""
y = np.random.rand(10)
y_expected = y[:, np.newaxis]
y_out = normalize_prediction_shape(y, mode="regression", n_tasks=1)
assert y_out.shape == (10, 1)
assert np.array_equal(y_expected, y_out)


def test_normalize_2d_regression():
"""Tests 2d regression normalization."""
y = np.random.rand(10, 5)
y_expected = y
y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
assert y_out.shape == (10, 5)
assert np.array_equal(y_expected, y_out)


def test_normalize_3d_regression():
"""Tests 3d regression normalization."""
y = np.random.rand(10, 5, 1)
y_expected = np.squeeze(y)
y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
assert y_out.shape == (10, 5)
assert np.array_equal(y_expected, y_out)


def test_scalar_weight_normalization():
"""Test normalization of weights."""
w_out = normalize_weight_shape(w=5, n_samples=10, n_tasks=5)
assert w_out.shape == (10, 5)
assert np.all(w_out == 5 * np.ones((10, 5)))


def test_1d_weight_normalization():
"""Test normalization of weights."""
w = np.random.rand(10)
# This has w for each task.
w_expected = np.array([w, w, w, w, w]).T
w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
assert w_out.shape == (10, 5)
assert np.all(w_out == w_expected)


def test_2d_weight_normalization():
"""Test normalization of weights."""
w = np.random.rand(10, 5)
w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
assert w_out.shape == (10, 5)
assert np.all(w_out == w)
14 changes: 9 additions & 5 deletions deepchem/models/graph_models.py
Expand Up @@ -657,11 +657,15 @@ class GraphConvModel(KerasModel):
"""Graph Convolutional Models.
This class implements the graph convolutional model from the
following paper:
Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
following paper [1]_. These graph convolutions start with a per-atom set of
descriptors for each atom in a molecule, then combine and recombine these
descriptors over convolutional layers.
References
----------
.. [1] Duvenaud, David K., et al. "Convolutional networks on graphs for
learning molecular fingerprints." Advances in neural information processing
systems. 2015.
"""

def __init__(self,
Expand Down

0 comments on commit 2062007

Please sign in to comment.