Merge 4152d36 into 0b8b134

deepchem · Jul 17, 2020 · 2062007 · 2062007
2 parents 0b8b134 + 4152d36
commit 2062007
Show file tree

Hide file tree

Showing 21 changed files with 3,878 additions and 2,530 deletions.
diff --git a/deepchem/metrics/__init__.py b/deepchem/metrics/__init__.py
diff --git a/deepchem/metrics/tests/metrics_test.py b/deepchem/metrics/tests/metrics_test.py
diff --git a/deepchem/metrics/tests/test_metrics.py b/deepchem/metrics/tests/test_metrics.py
@@ -0,0 +1,72 @@
+"""
+Tests for metricsT.
+"""
+import numpy as np
+import deepchem as dc
+import unittest
+from deepchem import metrics
+
+
+def test_kappa_score():
+  y_true = [1, 0, 1, 0]
+  y_pred = [0.8, 0.2, 0.3, 0.4]  # [1, 0, 0, 0] with 0.5 threshold
+  kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
+  observed_agreement = 3.0 / 4.0
+  expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
+  expected_kappa = np.true_divide(observed_agreement - expected_agreement,
+                                  1.0 - expected_agreement)
+  np.testing.assert_almost_equal(kappa, expected_kappa)
+
+
+def test_one_sample():
+  """Test that the metrics won't raise error even in an extreme condition
+  where there is only one sample with w > 0.
+  """
+  np.random.seed(123)
+  n_samples = 2
+  y_true = np.random.randint(2, size=(n_samples,))
+  y_pred = np.random.randint(2, size=(n_samples,))
+  w = np.array([0, 1])
+  all_metrics = [
+      dc.metrics.Metric(dc.metrics.recall_score),
+      dc.metrics.Metric(dc.metrics.matthews_corrcoef),
+      dc.metrics.Metric(dc.metrics.roc_auc_score)
+  ]
+  for metric in all_metrics:
+    score = metric.compute_singletask_metric(y_true, y_pred, w)
+
+
+def test_r2_score():
+  """Test that R^2 metric passes basic sanity tests"""
+  np.random.seed(123)
+  n_samples = 10
+  y_true = np.random.rand(n_samples,)
+  y_pred = np.random.rand(n_samples,)
+  regression_metric = dc.metrics.Metric(dc.metrics.r2_score, n_tasks=1)
+  assert np.isclose(
+      dc.metrics.r2_score(y_true, y_pred),
+      regression_metric.compute_metric(y_true, y_pred))
+
+
+def test_bedroc_score():
+  """Test BEDROC."""
+  num_actives = 20
+  num_total = 400
+
+  y_true_actives = np.ones(num_actives)
+  y_true_inactives = np.zeros(num_total - num_actives)
+  y_true = np.concatenate([y_true_actives, y_true_inactives])
+
+  # Best score case
+  y_pred_best = dc.metrics.to_one_hot(
+      np.concatenate([y_true_actives, y_true_inactives]))
+  best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
+  np.testing.assert_almost_equal(best_score, 1.0)
+
+  # Worst score case
+  worst_pred_actives = np.zeros(num_actives)
+  worst_pred_inactives = np.ones(num_total - num_actives)
+  y_pred_worst = dc.metrics.to_one_hot(
+      np.concatenate([worst_pred_actives, worst_pred_inactives]))
+  worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
+  np.testing.assert_almost_equal(worst_score, 0.0, 4)
diff --git a/deepchem/metrics/tests/test_normalize.py b/deepchem/metrics/tests/test_normalize.py
@@ -0,0 +1,194 @@
+"""Test normalization of input."""
+
+import numpy as np
+import unittest
+import deepchem as dc
+from deepchem.metrics import to_one_hot
+from deepchem.metrics import from_one_hot
+from deepchem.metrics import threshold_predictions
+from deepchem.metrics import handle_classification_mode
+from deepchem.metrics import normalize_prediction_shape
+from deepchem.metrics import normalize_weight_shape
+
+
+def test_one_hot():
+  """Test the one hot encoding."""
+  y = np.array([0, 0, 1, 0, 1, 1, 0])
+  y_hot = to_one_hot(y)
+  expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0]])
+  yp = from_one_hot(y_hot)
+  assert np.array_equal(expected, y_hot)
+  assert np.array_equal(y, yp)
+
+
+def test_handle_classification_mode_none():
+  """Test proper thresholding."""
+  y = np.random.rand(10, 2)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y = np.expand_dims(y, 1)
+  y_expected = y
+  y_out = handle_classification_mode(y, None)
+  assert y_out.shape == (10, 1, 2)
+  assert np.array_equal(y_out, y_expected)
+
+
+def test_handle_classification_mode_threshold():
+  """Test proper thresholding."""
+  y = np.random.rand(10, 2)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y = np.expand_dims(y, 1)
+  y_expected = np.argmax(np.squeeze(y), axis=1)[:, np.newaxis]
+  y_out = handle_classification_mode(y, "threshold", threshold_value=0.5)
+  assert y_out.shape == (10, 1)
+  assert np.array_equal(y_out, y_expected)
+
+
+def test_handle_classification_mode_threshold_nonstandard():
+  """Test proper thresholding."""
+  y = np.random.rand(10, 2)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y_expected = np.where(y[:, 1] >= 0.3, np.ones(10),
+                        np.zeros(10))[:, np.newaxis]
+  y = np.expand_dims(y, 1)
+  y_out = handle_classification_mode(y, "threshold", threshold_value=0.3)
+  assert y_out.shape == (10, 1)
+  assert np.array_equal(y_out, y_expected)
+
+
+def test_handle_classification_mode_threshold_one_hot():
+  """Test proper thresholding."""
+  y = np.random.rand(10, 2)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y = np.expand_dims(y, 1)
+  y_expected = np.expand_dims(
+      to_one_hot(np.argmax(np.squeeze(y), axis=1), n_classes=2), 1)
+  y_out = handle_classification_mode(
+      y, "threshold-one-hot", threshold_value=0.5)
+  assert y_out.shape == (10, 1, 2)
+  assert np.array_equal(y_out, y_expected)
+
+
+def test_threshold_predictions_binary():
+  """Test thresholding of binary predictions."""
+  # Get a random prediction matrix
+  y = np.random.rand(10, 2)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y_thresh = threshold_predictions(y, 0.5)
+  assert y_thresh.shape == (10,)
+  assert (y_thresh == np.argmax(y, axis=1)).all()
+
+
+def test_threshold_predictions_multiclass():
+  """Test thresholding of multiclass predictions."""
+  y = np.random.rand(10, 5)
+  y = y / np.sum(y, axis=1)[:, np.newaxis]
+  y_thresh = threshold_predictions(y)
+  assert y_thresh.shape == (10,)
+  assert (y_thresh == np.argmax(y, axis=1)).all()
+
+
+def test_normalize_1d_classification_binary():
+  """Tests 1d classification normalization."""
+  y = np.array([0, 0, 1, 0, 1, 1, 0])
+  expected = np.array([[[1., 0.]], [[1., 0.]], [[0., 1.]], [[1., 0.]],
+                       [[0., 1.]], [[0., 1.]], [[1., 0.]]])
+  y_out = normalize_prediction_shape(
+      y, mode="classification", n_tasks=1, n_classes=2)
+  assert y_out.shape == (7, 1, 2)
+  assert np.array_equal(expected, y_out)
+
+
+def test_normalize_1d_classification_multiclass():
+  """Tests 1d classification normalization."""
+  y = np.random.randint(5, size=(200,))
+  y_expected = np.expand_dims(to_one_hot(y, n_classes=5), 1)
+  y_out = normalize_prediction_shape(
+      y, mode="classification", n_tasks=1, n_classes=5)
+  assert y_out.shape == (200, 1, 5)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_1d_classification_multiclass_explicit_nclasses():
+  """Tests 1d classification normalization."""
+  y = np.random.randint(5, size=(10,))
+  y_expected = np.expand_dims(to_one_hot(y, n_classes=10), 1)
+  y_out = normalize_prediction_shape(
+      y, mode="classification", n_classes=10, n_tasks=1)
+  assert y_out.shape == (10, 1, 10)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_2d_classification_binary():
+  """Tests 2d classification normalization."""
+  # Of shape (N, n_classes)
+  y = np.random.randint(2, size=(10, 1))
+  y_expected = np.expand_dims(dc.metrics.to_one_hot(np.squeeze(y)), 1)
+  y_out = normalize_prediction_shape(
+      y, mode="classification", n_tasks=1, n_classes=2)
+  assert y_out.shape == (10, 1, 2)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_3d_classification_binary():
+  """Tests 1d classification normalization."""
+  # Of shape (N, 1, n_classes)
+  y = np.random.randint(2, size=(10,))
+  y = dc.metrics.to_one_hot(y, n_classes=2)
+  y = np.expand_dims(y, 1)
+  y_expected = y
+  y_out = normalize_prediction_shape(
+      y, mode="classification", n_tasks=1, n_classes=2)
+  assert y_out.shape == (10, 1, 2)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_1d_regression():
+  """Tests 1d regression normalization."""
+  y = np.random.rand(10)
+  y_expected = y[:, np.newaxis]
+  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=1)
+  assert y_out.shape == (10, 1)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_2d_regression():
+  """Tests 2d regression normalization."""
+  y = np.random.rand(10, 5)
+  y_expected = y
+  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
+  assert y_out.shape == (10, 5)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_normalize_3d_regression():
+  """Tests 3d regression normalization."""
+  y = np.random.rand(10, 5, 1)
+  y_expected = np.squeeze(y)
+  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
+  assert y_out.shape == (10, 5)
+  assert np.array_equal(y_expected, y_out)
+
+
+def test_scalar_weight_normalization():
+  """Test normalization of weights."""
+  w_out = normalize_weight_shape(w=5, n_samples=10, n_tasks=5)
+  assert w_out.shape == (10, 5)
+  assert np.all(w_out == 5 * np.ones((10, 5)))
+
+
+def test_1d_weight_normalization():
+  """Test normalization of weights."""
+  w = np.random.rand(10)
+  # This has w for each task.
+  w_expected = np.array([w, w, w, w, w]).T
+  w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
+  assert w_out.shape == (10, 5)
+  assert np.all(w_out == w_expected)
+
+
+def test_2d_weight_normalization():
+  """Test normalization of weights."""
+  w = np.random.rand(10, 5)
+  w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
+  assert w_out.shape == (10, 5)
+  assert np.all(w_out == w)
diff --git a/deepchem/models/graph_models.py b/deepchem/models/graph_models.py
@@ -657,11 +657,15 @@ class GraphConvModel(KerasModel):
   """Graph Convolutional Models.
 
   This class implements the graph convolutional model from the
-  following paper:
-
-
-  Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
-
+  following paper [1]_. These graph convolutions start with a per-atom set of
+  descriptors for each atom in a molecule, then combine and recombine these
+  descriptors over convolutional layers.
+
+  References
+  ----------
+  .. [1] Duvenaud, David K., et al. "Convolutional networks on graphs for
+  learning molecular fingerprints." Advances in neural information processing
+  systems. 2015.
   """
 
   def __init__(self,