From 52f929d03cd12ed8ce976a806a74503c68ffccfe Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 14 Jul 2020 23:09:55 -0500 Subject: [PATCH 01/41] ENH: Wrap PyTorch/Keras models --- dask_ml/wrappers.py | 140 ++++++++++++++++++++++++++++++++++++++++- tests/test_wrappers.py | 125 ++++++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 tests/test_wrappers.py diff --git a/dask_ml/wrappers.py b/dask_ml/wrappers.py index 810fdba42..bc89898ed 100644 --- a/dask_ml/wrappers.py +++ b/dask_ml/wrappers.py @@ -481,7 +481,7 @@ def _fit_for_estimator(self, estimator, X, y, **fit_kwargs): random_state=self.random_state, shuffle_blocks=self.shuffle_blocks, assume_equal_chunks=self.assume_equal_chunks, - **fit_kwargs + **fit_kwargs, ) copy_learned_attributes(result, self) @@ -551,3 +551,141 @@ def _predict_proba(part, estimator): def _transform(part, estimator): return estimator.transform(part) + + +try: + from scikeras.wrappers import KerasRegressor, KerasClassifier +except: + + class _KerasError: + def __init__(self, *args, **kwargs): + raise ModuleNotFoundError( + "SciKeras not installed. Please install Tensorflow, Keras " + "and SciKeras. This code will work:\n\n" + " $ pip install tensorflow keras scikeras\n" + ) + + class KerasClassifier(_KerasError): + pass + + class KerasRegressor(_KerasError): + pass + + +try: + from skorch import NeuralNetClassifier, NeuralNetRegressor +except: + + class _PyTorchError: + def __init__(self, *args, **kwargs): + raise ModuleNotFoundError( + "Skorch not installed. Please install PyTorch and Skorch:" + " * PyTorch installation instructions: https://pytorch.org/ \n" + " * Skorch installation: `pip install skorch`" + ) + + class NeuralNetClassifier(_PyTorchError): + pass + + class NeuralNetRegressor(_PyTorchError): + pass + + +class PyTorchClassifier(NeuralNetClassifier): + """ + A wrapper for PyTorch modules that's most suited for model selection. + + This class is a wrapper around `Skorch`_, which brings a Scikit-learn API + to PyTorch. + + .. _Skorch: https://skorch.readthedocs.io + + Examples + -------- + >>> import torch.optim as optim + >>> import torch.nn as nn + >>> from dask_ml.wrappers import PyTorchRegressor + >>> import torch + >>> + >>> class ShallowNet(nn.Module): + ... def __init__(self, n_features=5): + ... super().__init__() + ... self.layer1 = nn.Linear(n_features, 1) + ... def forward(self, x): + ... return torch.sign(self.layer1(x)) + ... + >>> model = PyTorchRegressor( + ... module=ShallowNet, + ... module__n_features=200, + ... optimizer=optim.SGD, + ... optimizer__lr=0.1, + ... batch_size=64, + ... ) + ... + >>> from sklearn.datasets import make_classification + >>> X, y = make_classification(n_features=200, n_samples=400, n_classes=2) + >>> X = X.astype("float32") + >>> y = y.astype("float32").reshape(-1, 1) + >>> model.partial_fit(X, y) + + """ + def __init__( + self, warm_start=False, train_split=None, max_epochs=1, callbacks=None, **kwargs + ): + super().__init__( + warm_start=warm_start, + train_split=train_split, + max_epochs=1, + callbacks=callbacks or [], + **kwargs, + ) + + +class PyTorchRegressor(NeuralNetRegressor): + """ + A wrapper for PyTorch modules that's most suited for model selection. + + This class is a wrapper around `Skorch`_, which brings a Scikit-learn API + to PyTorch. + + .. _Skorch: https://skorch.readthedocs.io + + Examples + -------- + >>> import torch.optim as optim + >>> import torch.nn as nn + >>> from dask_ml.wrappers import PyTorchRegressor + >>> + >>> class ShallowNet(nn.Module): + ... def __init__(self, n_features=5): + ... super().__init__() + ... self.layer1 = nn.Linear(n_features, 1) + ... def forward(self, x): + ... return F.relu(self.layer1(x)) + ... + >>> model = PyTorchRegressor( + ... module=ShallowNet, + ... module__n_features=200, + ... optimizer=optim.SGD, + ... optimizer__lr=0.1, + ... batch_size=64, + ... ) + >>> from sklearn.datasets import make_regression + >>> X, y = make_regression(n_features=200, n_samples=400) + >>> X, y = X.astype("float32"), y.astype("float32") + >>> model.partial_fit(X, y) + + """ + + def __init__(self, warm_start=False, train_split=None, max_epochs=1, **kwargs): + super().__init__( + warm_start=warm_start, + train_split=train_split, + max_epochs=max_epochs, + **kwargs, + ) + + def initialize(self, *args, **kwargs): + r = super().initialize(*args, **kwargs) + self.callbacks_ = [] + return r diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py new file mode 100644 index 000000000..ea7dc6f42 --- /dev/null +++ b/tests/test_wrappers.py @@ -0,0 +1,125 @@ +import pickle +from typing import Tuple + +import numpy as np +import pytest +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +import tensorflow as tf +from sklearn.datasets import make_classification, make_regression +from distributed.utils_test import gen_cluster +from scipy.stats import loguniform, uniform +from tensorflow.keras.datasets import mnist as keras_mnist +from tensorflow.keras.layers import Dense, Activation, Dropout +from tensorflow.keras.models import Sequential +from skorch import NeuralNetClassifier + +from dask_ml.wrappers import ( + KerasClassifier, + KerasRegressor, + PyTorchClassifier, + PyTorchRegressor, +) +from dask_ml.model_selection import IncrementalSearchCV +from sklearn.model_selection import RandomizedSearchCV + + +def mnist() -> Tuple[np.ndarray, np.ndarray]: + (X_train, y_train), _ = keras_mnist.load_data() + X_train = X_train[:100] + y_train = y_train[:100] + X_train = X_train.reshape(X_train.shape[0], 784) + X_train = X_train.astype("float32") + X_train /= 255 + assert all(isinstance(x, np.ndarray) for x in [X_train, y_train]) + return X_train, y_train + + +def _keras_build_fn(optimizer="rmsprop", lr=0.01, kernel_initializer="glorot_uniform"): + model = Sequential() + model.add(Dense(512, input_shape=(784,))) + model.add(Activation("relu")) + model.add(Dropout(0.2)) + model.add(Dense(512, kernel_initializer=kernel_initializer)) + model.add(Activation("relu")) + model.add(Dropout(0.2)) + model.add(Dense(10, kernel_initializer=kernel_initializer)) + model.add(Activation("softmax")) # This special "softmax" a + + opt = optimizer + if optimizer == "SGD": + opt = tf.keras.optimizers.SGD(learning_rate=lr) + model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) + return model + + +def test_keras(): + X, y = mnist() + assert X.shape[-1] == 784 + assert y.ndim == 1 + assert len(X) == len(y) + model = KerasClassifier(build_fn=_keras_build_fn) + model.partial_fit(X, y) + + +class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + + def forward(self, x): + return F.relu(self.layer1(x)) + + +@gen_cluster(client=True) +def test_pytorch(c, s, a, b): + n_features = 10 + clf = PyTorchRegressor( + module=ShallowNet, + module__n_features=n_features, + criterion=nn.MSELoss, + optimizer=optim.SGD, + optimizer__lr=0.1, + batch_size=64, + ) + from sklearn.base import clone + clf2 = clone(clf) + assert clf.callbacks == None + assert clf.warm_start == False + assert clf.train_split is None + assert clf.max_epochs == 1 + + params = {"optimizer__lr": loguniform(1e-3, 1e0)} + X, y = make_regression(n_samples=100, n_features=n_features) + X = X.astype("float32") + y = y.astype("float32").reshape(-1, 1) + search = IncrementalSearchCV(clf, params, max_iter=5, decay_rate=None) + yield search.fit(X, y) + assert search.best_score_ >= 0 + +def test_pytorch_doc(): + import torch.optim as optim + import torch.nn as nn + from dask_ml.wrappers import PyTorchRegressor + import torch + + class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + def forward(self, x): + return torch.sign(self.layer1(x)) + + model = PyTorchRegressor( + module=ShallowNet, + module__n_features=200, + optimizer=optim.SGD, + optimizer__lr=0.1, + batch_size=64, + ) + from sklearn.datasets import make_classification + X, y = make_classification(n_features=200, n_samples=400, n_classes=2) + X = X.astype("float32") + y = y.astype("float32").reshape(-1, 1) + model.partial_fit(X, y) From 57a12d65f4ad69817ee50b23b7e16bb2b896e3e8 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 14 Jul 2020 23:10:36 -0500 Subject: [PATCH 02/41] MAINT: ci requirements --- ci/environment-3.8.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/environment-3.8.yaml b/ci/environment-3.8.yaml index c2c260925..b499b5837 100644 --- a/ci/environment-3.8.yaml +++ b/ci/environment-3.8.yaml @@ -30,3 +30,7 @@ dependencies: - pip - pip: - pytest-azurepipelines + - tensorflow + - scikeras + - skorch + - torch From b766c60a0678fb5678b751e86d97ccf27cbc3728 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 11:44:52 -0500 Subject: [PATCH 03/41] Remove wrapper impl --- dask_ml/wrappers.py | 138 ------------------------- tests/model_selection/test_wrappers.py | 105 +++++++++++++++++++ tests/test_wrappers.py | 125 ---------------------- 3 files changed, 105 insertions(+), 263 deletions(-) create mode 100644 tests/model_selection/test_wrappers.py delete mode 100644 tests/test_wrappers.py diff --git a/dask_ml/wrappers.py b/dask_ml/wrappers.py index bc89898ed..19718ff64 100644 --- a/dask_ml/wrappers.py +++ b/dask_ml/wrappers.py @@ -551,141 +551,3 @@ def _predict_proba(part, estimator): def _transform(part, estimator): return estimator.transform(part) - - -try: - from scikeras.wrappers import KerasRegressor, KerasClassifier -except: - - class _KerasError: - def __init__(self, *args, **kwargs): - raise ModuleNotFoundError( - "SciKeras not installed. Please install Tensorflow, Keras " - "and SciKeras. This code will work:\n\n" - " $ pip install tensorflow keras scikeras\n" - ) - - class KerasClassifier(_KerasError): - pass - - class KerasRegressor(_KerasError): - pass - - -try: - from skorch import NeuralNetClassifier, NeuralNetRegressor -except: - - class _PyTorchError: - def __init__(self, *args, **kwargs): - raise ModuleNotFoundError( - "Skorch not installed. Please install PyTorch and Skorch:" - " * PyTorch installation instructions: https://pytorch.org/ \n" - " * Skorch installation: `pip install skorch`" - ) - - class NeuralNetClassifier(_PyTorchError): - pass - - class NeuralNetRegressor(_PyTorchError): - pass - - -class PyTorchClassifier(NeuralNetClassifier): - """ - A wrapper for PyTorch modules that's most suited for model selection. - - This class is a wrapper around `Skorch`_, which brings a Scikit-learn API - to PyTorch. - - .. _Skorch: https://skorch.readthedocs.io - - Examples - -------- - >>> import torch.optim as optim - >>> import torch.nn as nn - >>> from dask_ml.wrappers import PyTorchRegressor - >>> import torch - >>> - >>> class ShallowNet(nn.Module): - ... def __init__(self, n_features=5): - ... super().__init__() - ... self.layer1 = nn.Linear(n_features, 1) - ... def forward(self, x): - ... return torch.sign(self.layer1(x)) - ... - >>> model = PyTorchRegressor( - ... module=ShallowNet, - ... module__n_features=200, - ... optimizer=optim.SGD, - ... optimizer__lr=0.1, - ... batch_size=64, - ... ) - ... - >>> from sklearn.datasets import make_classification - >>> X, y = make_classification(n_features=200, n_samples=400, n_classes=2) - >>> X = X.astype("float32") - >>> y = y.astype("float32").reshape(-1, 1) - >>> model.partial_fit(X, y) - - """ - def __init__( - self, warm_start=False, train_split=None, max_epochs=1, callbacks=None, **kwargs - ): - super().__init__( - warm_start=warm_start, - train_split=train_split, - max_epochs=1, - callbacks=callbacks or [], - **kwargs, - ) - - -class PyTorchRegressor(NeuralNetRegressor): - """ - A wrapper for PyTorch modules that's most suited for model selection. - - This class is a wrapper around `Skorch`_, which brings a Scikit-learn API - to PyTorch. - - .. _Skorch: https://skorch.readthedocs.io - - Examples - -------- - >>> import torch.optim as optim - >>> import torch.nn as nn - >>> from dask_ml.wrappers import PyTorchRegressor - >>> - >>> class ShallowNet(nn.Module): - ... def __init__(self, n_features=5): - ... super().__init__() - ... self.layer1 = nn.Linear(n_features, 1) - ... def forward(self, x): - ... return F.relu(self.layer1(x)) - ... - >>> model = PyTorchRegressor( - ... module=ShallowNet, - ... module__n_features=200, - ... optimizer=optim.SGD, - ... optimizer__lr=0.1, - ... batch_size=64, - ... ) - >>> from sklearn.datasets import make_regression - >>> X, y = make_regression(n_features=200, n_samples=400) - >>> X, y = X.astype("float32"), y.astype("float32") - >>> model.partial_fit(X, y) - - """ - - def __init__(self, warm_start=False, train_split=None, max_epochs=1, **kwargs): - super().__init__( - warm_start=warm_start, - train_split=train_split, - max_epochs=max_epochs, - **kwargs, - ) - - def initialize(self, *args, **kwargs): - r = super().initialize(*args, **kwargs) - self.callbacks_ = [] - return r diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py new file mode 100644 index 000000000..01f99fbdb --- /dev/null +++ b/tests/model_selection/test_wrappers.py @@ -0,0 +1,105 @@ +import pickle +from typing import Tuple + +import numpy as np +import pytest +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +import tensorflow as tf +from sklearn.datasets import make_classification, make_regression +from distributed.utils_test import gen_cluster +from scipy.stats import loguniform, uniform +from tensorflow.keras.datasets import mnist as keras_mnist +from tensorflow.keras.layers import Dense, Activation, Dropout +from tensorflow.keras.models import Sequential +from tensorflow.keras.utils import to_categorical +from skorch import NeuralNetClassifier +from sklearn.exceptions import DataConversionWarning + +from dask_ml.model_selection import IncrementalSearchCV +from sklearn.model_selection import RandomizedSearchCV +from sklearn.base import clone + +from scikeras.wrappers import KerasClassifier, KerasRegressor +from skorch import NeuralNetClassifier, NeuralNetRegressor + + +def mnist() -> Tuple[np.ndarray, np.ndarray]: + (X_train, y_train), _ = keras_mnist.load_data() + X_train = X_train[:100] + y_train = y_train[:100] + X_train = X_train.reshape(X_train.shape[0], 784) + X_train = X_train.astype("float32") + X_train /= 255 + Y_train = to_categorical(y_train, 10) + return X_train, y_train + + +def _keras_build_fn(lr=0.01): + layers = [ + Dense(512, input_shape=(784,), activation="relu"), + Dense(10, input_shape=(512,), activation="softmax"), + ] + model = Sequential(layers) + + opt = tf.keras.optimizers.SGD(learning_rate=lr) + model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) + return model + + +def test_keras(): + X, y = mnist() + assert X.ndim == 2 and X.shape[-1] == 784 + assert y.ndim == 1 and len(X) == len(y) + + model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) + params = {"lr": loguniform(1e-3, 1e-1)} + + search = IncrementalSearchCV(model, params, max_iter=5) + with pytest.warns(DataConversionWarning): + yield search.fit(X, y) + assert search.best_score_ >= 0 + + +class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + + def forward(self, x): + return F.relu(self.layer1(x)) + + +@gen_cluster(client=True) +def test_pytorch(c, s, a, b): + n_features = 10 + defaults = { + "callbacks": False, + "warm_start": False, + "train_split": None, + "max_epochs": 1, + } + model = NeuralNetRegressor( + module=ShallowNet, + module__n_features=n_features, + criterion=nn.MSELoss, + optimizer=optim.SGD, + optimizer__lr=0.1, + batch_size=64, + **defaults, + ) + + model2 = clone(model) + assert model.callbacks == False + assert model.warm_start == False + assert model.train_split is None + assert model.max_epochs == 1 + + params = {"optimizer__lr": loguniform(1e-3, 1e0)} + X, y = make_regression(n_samples=100, n_features=n_features) + X = X.astype("float32") + y = y.astype("float32").reshape(-1, 1) + search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None) + yield search.fit(X, y) + assert search.best_score_ >= 0 diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py deleted file mode 100644 index ea7dc6f42..000000000 --- a/tests/test_wrappers.py +++ /dev/null @@ -1,125 +0,0 @@ -import pickle -from typing import Tuple - -import numpy as np -import pytest -import torch.optim as optim -import torch.nn as nn -import torch.nn.functional as F -import tensorflow as tf -from sklearn.datasets import make_classification, make_regression -from distributed.utils_test import gen_cluster -from scipy.stats import loguniform, uniform -from tensorflow.keras.datasets import mnist as keras_mnist -from tensorflow.keras.layers import Dense, Activation, Dropout -from tensorflow.keras.models import Sequential -from skorch import NeuralNetClassifier - -from dask_ml.wrappers import ( - KerasClassifier, - KerasRegressor, - PyTorchClassifier, - PyTorchRegressor, -) -from dask_ml.model_selection import IncrementalSearchCV -from sklearn.model_selection import RandomizedSearchCV - - -def mnist() -> Tuple[np.ndarray, np.ndarray]: - (X_train, y_train), _ = keras_mnist.load_data() - X_train = X_train[:100] - y_train = y_train[:100] - X_train = X_train.reshape(X_train.shape[0], 784) - X_train = X_train.astype("float32") - X_train /= 255 - assert all(isinstance(x, np.ndarray) for x in [X_train, y_train]) - return X_train, y_train - - -def _keras_build_fn(optimizer="rmsprop", lr=0.01, kernel_initializer="glorot_uniform"): - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation("relu")) - model.add(Dropout(0.2)) - model.add(Dense(512, kernel_initializer=kernel_initializer)) - model.add(Activation("relu")) - model.add(Dropout(0.2)) - model.add(Dense(10, kernel_initializer=kernel_initializer)) - model.add(Activation("softmax")) # This special "softmax" a - - opt = optimizer - if optimizer == "SGD": - opt = tf.keras.optimizers.SGD(learning_rate=lr) - model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) - return model - - -def test_keras(): - X, y = mnist() - assert X.shape[-1] == 784 - assert y.ndim == 1 - assert len(X) == len(y) - model = KerasClassifier(build_fn=_keras_build_fn) - model.partial_fit(X, y) - - -class ShallowNet(nn.Module): - def __init__(self, n_features=5): - super().__init__() - self.layer1 = nn.Linear(n_features, 1) - - def forward(self, x): - return F.relu(self.layer1(x)) - - -@gen_cluster(client=True) -def test_pytorch(c, s, a, b): - n_features = 10 - clf = PyTorchRegressor( - module=ShallowNet, - module__n_features=n_features, - criterion=nn.MSELoss, - optimizer=optim.SGD, - optimizer__lr=0.1, - batch_size=64, - ) - from sklearn.base import clone - clf2 = clone(clf) - assert clf.callbacks == None - assert clf.warm_start == False - assert clf.train_split is None - assert clf.max_epochs == 1 - - params = {"optimizer__lr": loguniform(1e-3, 1e0)} - X, y = make_regression(n_samples=100, n_features=n_features) - X = X.astype("float32") - y = y.astype("float32").reshape(-1, 1) - search = IncrementalSearchCV(clf, params, max_iter=5, decay_rate=None) - yield search.fit(X, y) - assert search.best_score_ >= 0 - -def test_pytorch_doc(): - import torch.optim as optim - import torch.nn as nn - from dask_ml.wrappers import PyTorchRegressor - import torch - - class ShallowNet(nn.Module): - def __init__(self, n_features=5): - super().__init__() - self.layer1 = nn.Linear(n_features, 1) - def forward(self, x): - return torch.sign(self.layer1(x)) - - model = PyTorchRegressor( - module=ShallowNet, - module__n_features=200, - optimizer=optim.SGD, - optimizer__lr=0.1, - batch_size=64, - ) - from sklearn.datasets import make_classification - X, y = make_classification(n_features=200, n_samples=400, n_classes=2) - X = X.astype("float32") - y = y.astype("float32").reshape(-1, 1) - model.partial_fit(X, y) From 69a78863e82305964f0b2916721186ade93f30fe Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 11:45:40 -0500 Subject: [PATCH 04/41] delete unused note for now --- docs/source/hyper-parameter-search.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/hyper-parameter-search.rst b/docs/source/hyper-parameter-search.rst index 474265562..15143721e 100644 --- a/docs/source/hyper-parameter-search.rst +++ b/docs/source/hyper-parameter-search.rst @@ -403,7 +403,7 @@ generalized to any of the above estimators. .. note:: - These estimators require that the model implement ``partial_fit`` + These estimators require that the model implement ``partial_fit``. By default, these class will call ``partial_fit`` on each chunk of the data. These classes can stop training any models if their score stops increasing From 1f54dad0234bf815d24168ff57b671c059c982ab Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 11:47:57 -0500 Subject: [PATCH 05/41] Add doc framework for integration --- docs/source/index.rst | 12 ++++++++++-- docs/source/keras.rst | 2 ++ docs/source/lightgbm.rst | 2 ++ docs/source/pytorch.rst | 2 ++ 4 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 docs/source/keras.rst create mode 100644 docs/source/lightgbm.rst create mode 100644 docs/source/pytorch.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index d7def230f..f3ac64663 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -120,13 +120,21 @@ Scikit-Learn should feel at home with Dask-ML. hyper-parameter-search.rst compose.rst glm.rst - joblib.rst meta-estimators.rst incremental.rst clustering.rst - xgboost.rst modules/api.rst +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Integration + + keras.rst + pytorch.rst + xgboost.rst + joblib.rst + .. toctree:: :maxdepth: 2 :hidden: diff --git a/docs/source/keras.rst b/docs/source/keras.rst new file mode 100644 index 000000000..231ad3e97 --- /dev/null +++ b/docs/source/keras.rst @@ -0,0 +1,2 @@ +Keras +===== diff --git a/docs/source/lightgbm.rst b/docs/source/lightgbm.rst new file mode 100644 index 000000000..22cd5381f --- /dev/null +++ b/docs/source/lightgbm.rst @@ -0,0 +1,2 @@ +LightGBM +======== diff --git a/docs/source/pytorch.rst b/docs/source/pytorch.rst new file mode 100644 index 000000000..d8d854f9b --- /dev/null +++ b/docs/source/pytorch.rst @@ -0,0 +1,2 @@ +PyTorch +======= From 36209a2e04020f7f3f0bca95a28fc79d8a9bdc8a Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 11:57:59 -0500 Subject: [PATCH 06/41] Combine XGBoost and LightGBM --- docs/source/lightgbm.rst | 2 -- docs/source/xgboost.rst | 25 ++++++++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) delete mode 100644 docs/source/lightgbm.rst diff --git a/docs/source/lightgbm.rst b/docs/source/lightgbm.rst deleted file mode 100644 index 22cd5381f..000000000 --- a/docs/source/lightgbm.rst +++ /dev/null @@ -1,2 +0,0 @@ -LightGBM -======== diff --git a/docs/source/xgboost.rst b/docs/source/xgboost.rst index c5674262e..18d5647fc 100644 --- a/docs/source/xgboost.rst +++ b/docs/source/xgboost.rst @@ -1,19 +1,29 @@ -XGBoost -======= +XGBoost & LightGBM +================== .. currentmodule:: dask_ml.xgboost +XGBoost_ is a powerful and popular library for gradient boosted trees. For +larger datasets or faster training XGBoost also provides a distributed +computing solution. LightGBM_ is another library similar to XGBoost; it also +natively supplies native distributed training for decision trees. + +Dask-ML can set up distributed XGBoost or LightGBM for you and hand off data +from distributed dask.dataframes. This automates much of the hassle of +preprocessing and setup while still letting XGBoost/LightGBM do what they do +well. + +Below, we'll refer to an example with XGBoost. Here are the relevant XGBoost +classes/functions: + .. autosummary:: train predict XGBClassifier XGBRegressor -XGBoost_ is a powerful and popular library for gradient boosted trees. For -larger datasets or faster training XGBoost also provides a distributed -computing solution. Dask-ML can set up distributed XGBoost for you and hand -off data from distributed dask.dataframes. This automates much of the hassle -of preprocessing and setup while still letting XGBoost do what it does well. +The LightGBM implementation and documentation can be found at +https://github.com/dask/dask-lightgbm. Example ------- @@ -63,3 +73,4 @@ relevant GitHub issue here: `dmlc/xgboost #2032 `" for an example usage. .. _XGBoost: https://xgboost.readthedocs.io/ +.. _LightGBM: https://lightgbm.readthedocs.io/ From 0c27c667cb852a7a0c18497a6ad4866cbf84b5a4 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 15:47:38 -0500 Subject: [PATCH 07/41] Skip if package not installed --- tests/model_selection/test_wrappers.py | 55 +++++++++++++++++++------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index 01f99fbdb..71799fee5 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -3,26 +3,32 @@ import numpy as np import pytest -import torch.optim as optim -import torch.nn as nn -import torch.nn.functional as F -import tensorflow as tf -from sklearn.datasets import make_classification, make_regression from distributed.utils_test import gen_cluster from scipy.stats import loguniform, uniform -from tensorflow.keras.datasets import mnist as keras_mnist -from tensorflow.keras.layers import Dense, Activation, Dropout -from tensorflow.keras.models import Sequential -from tensorflow.keras.utils import to_categorical -from skorch import NeuralNetClassifier +from sklearn.model_selection import RandomizedSearchCV +from sklearn.datasets import make_classification, make_regression +from sklearn.base import clone from sklearn.exceptions import DataConversionWarning from dask_ml.model_selection import IncrementalSearchCV -from sklearn.model_selection import RandomizedSearchCV -from sklearn.base import clone -from scikeras.wrappers import KerasClassifier, KerasRegressor -from skorch import NeuralNetClassifier, NeuralNetRegressor +try: + import tensorflow as tf + from tensorflow.keras.datasets import mnist as keras_mnist + from tensorflow.keras.layers import Dense, Activation, Dropout + from tensorflow.keras.models import Sequential + from tensorflow.keras.utils import to_categorical + from scikeras.wrappers import KerasClassifier, KerasRegressor +except: + pass + +try: + import torch.optim as optim + import torch.nn as nn + import torch.nn.functional as F + from skorch import NeuralNetClassifier, NeuralNetRegressor +except: + pass def mnist() -> Tuple[np.ndarray, np.ndarray]: @@ -48,14 +54,30 @@ def _keras_build_fn(lr=0.01): return model -def test_keras(): +@gen_cluster(client=True) +def test_keras(c, s, a, b): +# def test_keras(): + pytest.importorskip("tensorflow") + pytest.importorskip("scikeras") + X, y = mnist() assert X.ndim == 2 and X.shape[-1] == 784 assert y.ndim == 1 and len(X) == len(y) + assert isinstance(X, np.ndarray) and isinstance(y, np.ndarray) model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) params = {"lr": loguniform(1e-3, 1e-1)} + with pytest.warns(DataConversionWarning): + m = model.partial_fit(X, y) + assert m is model + model2 = pickle.loads(pickle.dumps(model)) + + search = RandomizedSearchCV(model, params) + with pytest.warns(DataConversionWarning): + search.fit(X, y, epochs=2) + assert search.best_score_ >= 0 + search = IncrementalSearchCV(model, params, max_iter=5) with pytest.warns(DataConversionWarning): yield search.fit(X, y) @@ -73,6 +95,9 @@ def forward(self, x): @gen_cluster(client=True) def test_pytorch(c, s, a, b): + pytest.importorskip("torch") + pytest.importorskip("skorch") + n_features = 10 defaults = { "callbacks": False, From 33449f73c6e5dfb4f6021d5cec02608a46ae64e1 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 15:47:46 -0500 Subject: [PATCH 08/41] Start to fill out docs --- docs/source/keras.rst | 43 ++++++++++++++++++++++++++++ docs/source/pytorch.rst | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/docs/source/keras.rst b/docs/source/keras.rst index 231ad3e97..d05923b19 100644 --- a/docs/source/keras.rst +++ b/docs/source/keras.rst @@ -1,2 +1,45 @@ Keras ===== + +The package SciKeras_ brings a Scikit-learn API to Keras. Install directions +are at https://github.com/adriangb/scikeras/blob/master/README.md#installation. + +Example usage +------------- + +First, let's start by defining normal function to create our model. This is the +normal way to create a `Keras Sequential model`_ + +.. _Keras Sequential model: https://keras.io/api/models/sequential/ + +.. code-block:: python + + import tensorflow as tf + from tensorflow.keras.layers import Dense, Activation, Dropout + from tensorflow.keras.models import Sequential + + def _keras_build_fn(lr=0.01): + layers = [Dense(512, input_shape=(784,), activation="relu"), + Dense(10, input_shape=(512,), activation="softmax")] + model = Sequential(layers) + + opt = tf.keras.optimizers.SGD(learning_rate=lr) + model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) + return model + +Now, we can use the SciKeras to create a Scikit-learn compatible model: + +.. code-block:: python + + from scikeras.wrappers import KerasClassifier, KerasRegressor + model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) + +This model will work with all of Dask-ML: it expects NumPy arrays as inputs and +obeys the Scikit-learn API. For example, the following examples have been +implemented: + +* Using Keras with the relevant model selection in Dask-ML, :class:`~dask_ml.model_selection.HyperbandSearchCV` + (TODO: link). +* Using Keras with Dask-ML's :class:`~dask_ml.wrappers.Incremental` (TODO link) + +.. _SciKeras: https://github.com/adriangb/scikeras diff --git a/docs/source/pytorch.rst b/docs/source/pytorch.rst index d8d854f9b..6392af93e 100644 --- a/docs/source/pytorch.rst +++ b/docs/source/pytorch.rst @@ -1,2 +1,64 @@ PyTorch ======= + +Skorch_ brings a Scikit-learn API to PyTorch_. We encourage looking at the +Skorch documentation for complete details. + +Example usage +------------- + +First, let's create a normal PyTorch model: + +.. code-block:: python + + + import torch.nn as nn + import torch.nn.functional as F + + class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + + def forward(self, x): + return F.relu(self.layer1(x)) + +With this, it's easy to use Skorch: + +.. code-block:: python + + from skorch import NeuralNetRegressor + import torch.optim as optim + + niceties = { + "callbacks": False, + "warm_start": False, + "train_split": None, + "max_epochs": 1, + } + + model = NeuralNetRegressor( + module=ShallowNet, + module__n_features=5, + criterion=nn.MSELoss, + optimizer=optim.SGD, + optimizer__lr=0.1, + optimizer__momentum=0.9, + batch_size=64, + **niceties, + ) + +Each parameter that the PyTorch `nn.Module` takes is prefixed with `module__`, +and same for the optimizer (`optim.SGD` takes a `lr` and `momentum` +parameters). + +Now, this model can be used with Dask-ML. The following examples have been +implemented: + +* Using PyTorch with the relevant model selection in Dask-ML, + :class:`~dask_ml.model_selection.HyperbandSearchCV` (TODO: link). +* Using PyTorch with Dask-ML's :class:`~dask_ml.wrappers.Incremental` (TODO: + link). + +.. _Skorch: https://skorch.readthedocs.io/en/stable/ +.. _PyTorch: https://pytorch.org From ea6e95cebfad5e3b64822a6cfb8b266cbf9ad7ef Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 16:06:24 -0500 Subject: [PATCH 09/41] Don't depend on dask-examples --- docs/source/keras.rst | 10 +++++----- docs/source/pytorch.rst | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/keras.rst b/docs/source/keras.rst index d05923b19..160a27e4f 100644 --- a/docs/source/keras.rst +++ b/docs/source/keras.rst @@ -35,11 +35,11 @@ Now, we can use the SciKeras to create a Scikit-learn compatible model: model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) This model will work with all of Dask-ML: it expects NumPy arrays as inputs and -obeys the Scikit-learn API. For example, the following examples have been -implemented: +obeys the Scikit-learn API. For example, it's possible to use Dask-ML to do the +following: -* Using Keras with the relevant model selection in Dask-ML, :class:`~dask_ml.model_selection.HyperbandSearchCV` - (TODO: link). -* Using Keras with Dask-ML's :class:`~dask_ml.wrappers.Incremental` (TODO link) +* Use Keras with Dask-ML's model selection, including + :class:`~dask_ml.model_selection.HyperbandSearchCV`. +* Use Keras with Dask-ML's :class:`~dask_ml.wrappers.Incremental`. .. _SciKeras: https://github.com/adriangb/scikeras diff --git a/docs/source/pytorch.rst b/docs/source/pytorch.rst index 6392af93e..bc42ea165 100644 --- a/docs/source/pytorch.rst +++ b/docs/source/pytorch.rst @@ -50,15 +50,15 @@ With this, it's easy to use Skorch: Each parameter that the PyTorch `nn.Module` takes is prefixed with `module__`, and same for the optimizer (`optim.SGD` takes a `lr` and `momentum` -parameters). +parameters). The ``niceties`` make sure Skorch uses all the data for training +and doesn't print excessive amounts of logs. -Now, this model can be used with Dask-ML. The following examples have been -implemented: +Now, this model can be used with Dask-ML. For example, it's possible to do the +following: -* Using PyTorch with the relevant model selection in Dask-ML, - :class:`~dask_ml.model_selection.HyperbandSearchCV` (TODO: link). -* Using PyTorch with Dask-ML's :class:`~dask_ml.wrappers.Incremental` (TODO: - link). +* Use PyTorch with the Dask-ML's model selection, including + :class:`~dask_ml.model_selection.HyperbandSearchCV`. +* Use PyTorch with Dask-ML's :class:`~dask_ml.wrappers.Incremental`. .. _Skorch: https://skorch.readthedocs.io/en/stable/ .. _PyTorch: https://pytorch.org From 3dfd9144b7cd4fd833e2e6c9b3b669e67fd8c860 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 16:37:55 -0500 Subject: [PATCH 10/41] Build on master --- ci/posix.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ci/posix.yaml b/ci/posix.yaml index da009c614..d0613d0c4 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -20,6 +20,7 @@ jobs: sklearnDev: envFile: 'ci/environment-3.7.yaml' SKLEARN_DEV: "yes" + WRAPPERS: "no" steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" @@ -33,6 +34,13 @@ jobs: - bash: conda env create --quiet --file=$(envFile) --name=dask-ml-test && conda list -n dask-ml-test displayName: "install" + - bash: | + conda install pytorch torchvision -c pytorch + pip install skorch + pip install tensorflow scikeras keras + displayName: "install Tensorflow/PyTorch" + condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') + - script: | source activate dask-ml-test conda uninstall -y --force scikit-learn From c3aa74f8640398da06ee54f354598c5016634067 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 16:49:02 -0500 Subject: [PATCH 11/41] Add note to tests --- tests/model_selection/test_wrappers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index 71799fee5..2fb75166c 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -68,6 +68,9 @@ def test_keras(c, s, a, b): model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) params = {"lr": loguniform(1e-3, 1e-1)} + # SciKeras reformats some of the shapes; I think this warning comes from + # that but am not sure + # https://github.com/adriangb/scikeras/issues/19#issuecomment-658549923 with pytest.warns(DataConversionWarning): m = model.partial_fit(X, y) assert m is model From 77a0e26a72985f742106ae203f35fa29481e81ac Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 16:49:34 -0500 Subject: [PATCH 12/41] REVERT: run on this PR too --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index d0613d0c4..acabcfd0f 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -39,7 +39,7 @@ jobs: pip install skorch pip install tensorflow scikeras keras displayName: "install Tensorflow/PyTorch" - condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') + # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') - script: | source activate dask-ml-test From aa53c21b3cc9687e9e42ddf7748532655f0c8ac7 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 16:55:24 -0500 Subject: [PATCH 13/41] Update note --- tests/model_selection/test_wrappers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index 2fb75166c..a9b232841 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -68,9 +68,10 @@ def test_keras(c, s, a, b): model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) params = {"lr": loguniform(1e-3, 1e-1)} - # SciKeras reformats some of the shapes; I think this warning comes from - # that but am not sure - # https://github.com/adriangb/scikeras/issues/19#issuecomment-658549923 + # Keras and Scikit-learn don't agree on shapes all the time. + # SciKeras does it's best to manage that. See [1] and [2]. + # [1]:https://github.com/adriangb/scikeras/issues/20 + # [2]:https://github.com/dask/dask-ml/pull/699#discussion_r455385057 with pytest.warns(DataConversionWarning): m = model.partial_fit(X, y) assert m is model From 102a5aa934de3761087b11c3c506f7f10e2840b1 Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 17:53:37 -0500 Subject: [PATCH 14/41] typo --- tests/model_selection/test_wrappers.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index a9b232841..fce1baef9 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -88,19 +88,17 @@ def test_keras(c, s, a, b): assert search.best_score_ >= 0 -class ShallowNet(nn.Module): - def __init__(self, n_features=5): - super().__init__() - self.layer1 = nn.Linear(n_features, 1) - - def forward(self, x): - return F.relu(self.layer1(x)) - - @gen_cluster(client=True) def test_pytorch(c, s, a, b): pytest.importorskip("torch") pytest.importorskip("skorch") + class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + + def forward(self, x): + return F.relu(self.layer1(x)) n_features = 10 defaults = { From c071c9e30a0bd5cf0178487a91e959416da2373f Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 18:04:44 -0500 Subject: [PATCH 15/41] isort --- tests/model_selection/test_wrappers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index fce1baef9..94a2a8d07 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -5,10 +5,10 @@ import pytest from distributed.utils_test import gen_cluster from scipy.stats import loguniform, uniform -from sklearn.model_selection import RandomizedSearchCV -from sklearn.datasets import make_classification, make_regression from sklearn.base import clone +from sklearn.datasets import make_classification, make_regression from sklearn.exceptions import DataConversionWarning +from sklearn.model_selection import RandomizedSearchCV from dask_ml.model_selection import IncrementalSearchCV From 27860994f4e73394e87075f3859321ae982ae4df Mon Sep 17 00:00:00 2001 From: Scott Date: Wed, 15 Jul 2020 18:30:46 -0500 Subject: [PATCH 16/41] Temporarily install from source --- ci/posix.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/posix.yaml b/ci/posix.yaml index acabcfd0f..86e4addf2 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -38,8 +38,11 @@ jobs: conda install pytorch torchvision -c pytorch pip install skorch pip install tensorflow scikeras keras + pip install -U git+https://github.com/adriangb/scikeras.git displayName: "install Tensorflow/PyTorch" # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') + # Installing from git is temporary: see + # https://github.com/adriangb/scikeras/pull/17#issuecomment-659064357 - script: | source activate dask-ml-test From 419ef976a178b7540675dfac51691f9922545747 Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 16 Jul 2020 13:15:14 -0500 Subject: [PATCH 17/41] remove print statement; resolve warning --- tests/model_selection/test_wrappers.py | 42 ++++++++++++-------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_wrappers.py index 94a2a8d07..573c78884 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_wrappers.py @@ -27,7 +27,10 @@ import torch.nn as nn import torch.nn.functional as F from skorch import NeuralNetClassifier, NeuralNetRegressor + + PYTORCH = True except: + PYTORCH = False pass @@ -47,7 +50,12 @@ def _keras_build_fn(lr=0.01): Dense(512, input_shape=(784,), activation="relu"), Dense(10, input_shape=(512,), activation="softmax"), ] - model = Sequential(layers) + + # See https://github.com/adriangb/scikeras/issues/24 + try: + model = Sequential(layers) + except TypeError: + model = Sequential(layers) opt = tf.keras.optimizers.SGD(learning_rate=lr) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) @@ -56,7 +64,6 @@ def _keras_build_fn(lr=0.01): @gen_cluster(client=True) def test_keras(c, s, a, b): -# def test_keras(): pytest.importorskip("tensorflow") pytest.importorskip("scikeras") @@ -65,33 +72,16 @@ def test_keras(c, s, a, b): assert y.ndim == 1 and len(X) == len(y) assert isinstance(X, np.ndarray) and isinstance(y, np.ndarray) - model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) + model = KerasClassifier(build_fn=_keras_build_fn, epochs=1, lr=0.1) params = {"lr": loguniform(1e-3, 1e-1)} - # Keras and Scikit-learn don't agree on shapes all the time. - # SciKeras does it's best to manage that. See [1] and [2]. - # [1]:https://github.com/adriangb/scikeras/issues/20 - # [2]:https://github.com/dask/dask-ml/pull/699#discussion_r455385057 - with pytest.warns(DataConversionWarning): - m = model.partial_fit(X, y) - assert m is model - model2 = pickle.loads(pickle.dumps(model)) - - search = RandomizedSearchCV(model, params) - with pytest.warns(DataConversionWarning): - search.fit(X, y, epochs=2) + search = IncrementalSearchCV(model, params, max_iter=2, decay_rate=None) + yield search.fit(X, y, epochs=1) assert search.best_score_ >= 0 - search = IncrementalSearchCV(model, params, max_iter=5) - with pytest.warns(DataConversionWarning): - yield search.fit(X, y) - assert search.best_score_ >= 0 +if PYTORCH: -@gen_cluster(client=True) -def test_pytorch(c, s, a, b): - pytest.importorskip("torch") - pytest.importorskip("skorch") class ShallowNet(nn.Module): def __init__(self, n_features=5): super().__init__() @@ -100,6 +90,12 @@ def __init__(self, n_features=5): def forward(self, x): return F.relu(self.layer1(x)) + +@gen_cluster(client=True) +def test_pytorch(c, s, a, b): + pytest.importorskip("torch") + pytest.importorskip("skorch") + n_features = 10 defaults = { "callbacks": False, From 12ec08b75c801cadbfcfdfbe13852b64f4936209 Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 16 Jul 2020 13:47:09 -0500 Subject: [PATCH 18/41] MAINT: allow models to be scattered --- dask_ml/model_selection/_incremental.py | 9 ++++++--- tests/model_selection/test_incremental.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py index f9cf33286..f5203de71 100644 --- a/dask_ml/model_selection/_incremental.py +++ b/dask_ml/model_selection/_incremental.py @@ -506,7 +506,7 @@ def __init__( self.prefix = prefix super(BaseIncrementalSearchCV, self).__init__(estimator, scoring=scoring) - def _validate_parameters(self, X, y): + async def _validate_parameters(self, X, y): if (self.max_iter is not None) and self.max_iter < 1: raise ValueError( "Received max_iter={}. max_iter < 1 is not supported".format( @@ -522,7 +522,10 @@ def _validate_parameters(self, X, y): kwargs = dict(accept_unknown_chunks=False, accept_dask_dataframe=False) X = self._check_array(X, **kwargs) y = self._check_array(y, ensure_2d=False, **kwargs) - scorer = check_scoring(self.estimator, scoring=self.scoring) + estimator = self.estimator + if isinstance(estimator, Future): + estimator = await estimator.result() + scorer = check_scoring(estimator, scoring=self.scoring) return X, y, scorer @property @@ -634,7 +637,7 @@ async def _fit(self, X, y, **fit_params): else: context = dummy_context() - X, y, scorer = self._validate_parameters(X, y) + X, y, scorer = await self._validate_parameters(X, y) X_train, X_test, y_train, y_test = self._get_train_test_split(X, y) with context: diff --git a/tests/model_selection/test_incremental.py b/tests/model_selection/test_incremental.py index 31da5b12c..dfe41cb9d 100644 --- a/tests/model_selection/test_incremental.py +++ b/tests/model_selection/test_incremental.py @@ -857,3 +857,18 @@ def test_warns_scores_per_fit(c, s, a, b): search = IncrementalSearchCV(model, params, scores_per_fit=2) with pytest.warns(UserWarning, match="deprecated since Dask-ML v1.4.0"): yield search.fit(X, y) + + +@gen_cluster(client=True) +async def test_model_future(c, s, a, b): + X, y = make_classification(n_samples=100, n_features=5, chunks=10) + + params = {"value": np.random.RandomState(42).rand(1000)} + model = ConstantFunction() + model_future = await c.scatter(model) + + search = IncrementalSearchCV(model_future, params, max_iter=10) + + await search.fit(X, y, classes=[0, 1]) + assert search.history_ + assert search.best_score_ > 0 From 76ab03b9a865f0aababfc1de85eb0beecfc87d00 Mon Sep 17 00:00:00 2001 From: Scott Sievert Date: Thu, 16 Jul 2020 13:59:21 -0500 Subject: [PATCH 19/41] Update docs/source/keras.rst Co-authored-by: Tom Augspurger --- docs/source/keras.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/keras.rst b/docs/source/keras.rst index 160a27e4f..b49910fbd 100644 --- a/docs/source/keras.rst +++ b/docs/source/keras.rst @@ -18,7 +18,7 @@ normal way to create a `Keras Sequential model`_ from tensorflow.keras.layers import Dense, Activation, Dropout from tensorflow.keras.models import Sequential - def _keras_build_fn(lr=0.01): + def build_model(lr=0.01): layers = [Dense(512, input_shape=(784,), activation="relu"), Dense(10, input_shape=(512,), activation="softmax")] model = Sequential(layers) From 4118de024da36d0f96eea5bf5d9ec984b735bde0 Mon Sep 17 00:00:00 2001 From: Scott Sievert Date: Thu, 16 Jul 2020 13:59:29 -0500 Subject: [PATCH 20/41] Update docs/source/keras.rst Co-authored-by: Tom Augspurger --- docs/source/keras.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/keras.rst b/docs/source/keras.rst index b49910fbd..c4ca26a05 100644 --- a/docs/source/keras.rst +++ b/docs/source/keras.rst @@ -32,7 +32,7 @@ Now, we can use the SciKeras to create a Scikit-learn compatible model: .. code-block:: python from scikeras.wrappers import KerasClassifier, KerasRegressor - model = KerasClassifier(build_fn=_keras_build_fn, lr=0.1) + model = KerasClassifier(build_fn=build_model, lr=0.1) This model will work with all of Dask-ML: it expects NumPy arrays as inputs and obeys the Scikit-learn API. For example, it's possible to use Dask-ML to do the From 770c28bf470e248954733b9e726aad96cf8a66ed Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 16 Jul 2020 13:59:46 -0500 Subject: [PATCH 21/41] DOC: title --- docs/source/keras.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/keras.rst b/docs/source/keras.rst index 160a27e4f..f56f1161a 100644 --- a/docs/source/keras.rst +++ b/docs/source/keras.rst @@ -1,5 +1,5 @@ -Keras -===== +Keras and Tensorflow +==================== The package SciKeras_ brings a Scikit-learn API to Keras. Install directions are at https://github.com/adriangb/scikeras/blob/master/README.md#installation. From 2605a8c80d4231340dff1eea4c6638d74e83ce07 Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 16 Jul 2020 14:12:35 -0500 Subject: [PATCH 22/41] remove ci --- ci/environment-3.8.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/environment-3.8.yaml b/ci/environment-3.8.yaml index b499b5837..c2c260925 100644 --- a/ci/environment-3.8.yaml +++ b/ci/environment-3.8.yaml @@ -30,7 +30,3 @@ dependencies: - pip - pip: - pytest-azurepipelines - - tensorflow - - scikeras - - skorch - - torch From 1ea5217b619a3432ba69beb39926fb63b69a7b90 Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 16 Jul 2020 14:58:21 -0500 Subject: [PATCH 23/41] await in hyperband too --- dask_ml/model_selection/_hyperband.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_ml/model_selection/_hyperband.py b/dask_ml/model_selection/_hyperband.py index e14407e9c..d480ef2c6 100644 --- a/dask_ml/model_selection/_hyperband.py +++ b/dask_ml/model_selection/_hyperband.py @@ -388,7 +388,7 @@ def _get_SHAs(self, brackets): return SHAs async def _fit(self, X, y, **fit_params): - X, y, scorer = self._validate_parameters(X, y) + X, y, scorer = await self._validate_parameters(X, y) brackets = _get_hyperband_params(self.max_iter, eta=self.aggressiveness) SHAs = self._get_SHAs(brackets) From da3ba8d9c8679ad777768da3e9a0a1533f8d63d0 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 20 Jul 2020 14:25:26 -0500 Subject: [PATCH 24/41] Rename to test_{keras, pytorch}.py --- .../{test_wrappers.py => test_keras.py} | 76 +++---------------- tests/model_selection/test_pytorch.py | 59 ++++++++++++++ 2 files changed, 69 insertions(+), 66 deletions(-) rename tests/model_selection/{test_wrappers.py => test_keras.py} (50%) create mode 100644 tests/model_selection/test_pytorch.py diff --git a/tests/model_selection/test_wrappers.py b/tests/model_selection/test_keras.py similarity index 50% rename from tests/model_selection/test_wrappers.py rename to tests/model_selection/test_keras.py index 573c78884..fbafae933 100644 --- a/tests/model_selection/test_wrappers.py +++ b/tests/model_selection/test_keras.py @@ -12,26 +12,16 @@ from dask_ml.model_selection import IncrementalSearchCV -try: - import tensorflow as tf - from tensorflow.keras.datasets import mnist as keras_mnist - from tensorflow.keras.layers import Dense, Activation, Dropout - from tensorflow.keras.models import Sequential - from tensorflow.keras.utils import to_categorical - from scikeras.wrappers import KerasClassifier, KerasRegressor -except: - pass - -try: - import torch.optim as optim - import torch.nn as nn - import torch.nn.functional as F - from skorch import NeuralNetClassifier, NeuralNetRegressor - - PYTORCH = True -except: - PYTORCH = False - pass +import pytest +pytest.importorskip("tensorflow") +pytest.importorskip("scikeras") + +import tensorflow as tf +from tensorflow.keras.datasets import mnist as keras_mnist +from tensorflow.keras.layers import Dense, Activation, Dropout +from tensorflow.keras.models import Sequential +from tensorflow.keras.utils import to_categorical +from scikeras.wrappers import KerasClassifier, KerasRegressor def mnist() -> Tuple[np.ndarray, np.ndarray]: @@ -80,49 +70,3 @@ def test_keras(c, s, a, b): assert search.best_score_ >= 0 -if PYTORCH: - - class ShallowNet(nn.Module): - def __init__(self, n_features=5): - super().__init__() - self.layer1 = nn.Linear(n_features, 1) - - def forward(self, x): - return F.relu(self.layer1(x)) - - -@gen_cluster(client=True) -def test_pytorch(c, s, a, b): - pytest.importorskip("torch") - pytest.importorskip("skorch") - - n_features = 10 - defaults = { - "callbacks": False, - "warm_start": False, - "train_split": None, - "max_epochs": 1, - } - model = NeuralNetRegressor( - module=ShallowNet, - module__n_features=n_features, - criterion=nn.MSELoss, - optimizer=optim.SGD, - optimizer__lr=0.1, - batch_size=64, - **defaults, - ) - - model2 = clone(model) - assert model.callbacks == False - assert model.warm_start == False - assert model.train_split is None - assert model.max_epochs == 1 - - params = {"optimizer__lr": loguniform(1e-3, 1e0)} - X, y = make_regression(n_samples=100, n_features=n_features) - X = X.astype("float32") - y = y.astype("float32").reshape(-1, 1) - search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None) - yield search.fit(X, y) - assert search.best_score_ >= 0 diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py new file mode 100644 index 000000000..92b4b7b7e --- /dev/null +++ b/tests/model_selection/test_pytorch.py @@ -0,0 +1,59 @@ +import pytest +pytest.importorskip("torch") +pytest.importorskip("skorch") + +from sklearn.base import clone +from distributed.utils_test import gen_cluster +from scipy.stats import loguniform +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +from skorch import NeuralNetClassifier, NeuralNetRegressor +from sklearn.datasets import make_regression +from dask_ml.model_selection import IncrementalSearchCV + + +class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) + + def forward(self, x): + return F.relu(self.layer1(x)) + + +@gen_cluster(client=True) +def test_pytorch(c, s, a, b): + pytest.importorskip("torch") + pytest.importorskip("skorch") + + n_features = 10 + defaults = { + "callbacks": False, + "warm_start": False, + "train_split": None, + "max_epochs": 1, + } + model = NeuralNetRegressor( + module=ShallowNet, + module__n_features=n_features, + criterion=nn.MSELoss, + optimizer=optim.SGD, + optimizer__lr=0.1, + batch_size=64, + **defaults, + ) + + model2 = clone(model) + assert model.callbacks == False + assert model.warm_start == False + assert model.train_split is None + assert model.max_epochs == 1 + + params = {"optimizer__lr": loguniform(1e-3, 1e0)} + X, y = make_regression(n_samples=100, n_features=n_features) + X = X.astype("float32") + y = y.astype("float32").reshape(-1, 1) + search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None) + yield search.fit(X, y) + assert search.best_score_ >= 0 From d7eccf2512db9ed9f5e4c8a4fbd7f680da386cc9 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 21 Jul 2020 08:53:25 -0500 Subject: [PATCH 25/41] tmp --- tests/model_selection/test_keras.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/model_selection/test_keras.py b/tests/model_selection/test_keras.py index fbafae933..784f94a18 100644 --- a/tests/model_selection/test_keras.py +++ b/tests/model_selection/test_keras.py @@ -54,9 +54,6 @@ def _keras_build_fn(lr=0.01): @gen_cluster(client=True) def test_keras(c, s, a, b): - pytest.importorskip("tensorflow") - pytest.importorskip("scikeras") - X, y = mnist() assert X.ndim == 2 and X.shape[-1] == 784 assert y.ndim == 1 and len(X) == len(y) @@ -68,5 +65,3 @@ def test_keras(c, s, a, b): search = IncrementalSearchCV(model, params, max_iter=2, decay_rate=None) yield search.fit(X, y, epochs=1) assert search.best_score_ >= 0 - - From 19ec22e9a5da92f59bea2d0aef0ed23c3bf54c79 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 21 Jul 2020 18:51:05 -0500 Subject: [PATCH 26/41] Pass check_scoring to submit --- dask_ml/model_selection/_incremental.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py index 13426d095..081cc1b9c 100644 --- a/dask_ml/model_selection/_incremental.py +++ b/dask_ml/model_selection/_incremental.py @@ -530,14 +530,10 @@ async def _validate_parameters(self, X, y): y = self._check_array(y, ensure_2d=False, **kwargs) estimator = self.estimator if isinstance(estimator, Future): - estimator = await estimator.result() - - kwargs = dict(accept_unknown_chunks=True, accept_dask_dataframe=True) - if not isinstance(X, dd.DataFrame): - X = self._check_array(X, **kwargs) - if not isinstance(y, dd.Series): - y = self._check_array(y, ensure_2d=False, **kwargs) - scorer = check_scoring(self.estimator, scoring=self.scoring) + client = default_client() + scorer = await client.submit(check_scoring, estimator, scoring=self.scoring) + else: + scorer = check_scoring(self.estimator, scoring=self.scoring) return X, y, scorer @property From 645353f9c176be1b26fd6e996d6abd8eb41c8707 Mon Sep 17 00:00:00 2001 From: Scott Sievert Date: Fri, 24 Jul 2020 14:47:40 -0500 Subject: [PATCH 27/41] Update ci/posix.yaml Co-authored-by: Tom Augspurger --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index 86e4addf2..1bde3b991 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -35,7 +35,7 @@ jobs: displayName: "install" - bash: | - conda install pytorch torchvision -c pytorch + conda install -y pytorch torchvision -c pytorch pip install skorch pip install tensorflow scikeras keras pip install -U git+https://github.com/adriangb/scikeras.git From 4d30692e4d1d317a944aa1b40a1ac0bad5559c47 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 25 Jul 2020 19:48:35 -0500 Subject: [PATCH 28/41] remove keras, give joblib edits --- docs/source/index.rst | 5 ++--- docs/source/joblib.rst | 4 ++-- docs/source/keras.rst | 45 ----------------------------------------- docs/source/pytorch.rst | 14 +++++++++---- 4 files changed, 14 insertions(+), 54 deletions(-) delete mode 100644 docs/source/keras.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index f3ac64663..191ab2269 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -130,10 +130,9 @@ Scikit-Learn should feel at home with Dask-ML. :hidden: :caption: Integration - keras.rst - pytorch.rst - xgboost.rst joblib.rst + xgboost.rst + pytorch.rst .. toctree:: :maxdepth: 2 diff --git a/docs/source/joblib.rst b/docs/source/joblib.rst index 0533d0a4c..1f55d50e7 100644 --- a/docs/source/joblib.rst +++ b/docs/source/joblib.rst @@ -1,7 +1,7 @@ .. _joblib: -Joblib -====== +Scikit-Learn & Joblib +===================== Many Scikit-Learn algorithms are written for parallel execution using `Joblib `__, which natively provides diff --git a/docs/source/keras.rst b/docs/source/keras.rst deleted file mode 100644 index 91d6353e6..000000000 --- a/docs/source/keras.rst +++ /dev/null @@ -1,45 +0,0 @@ -Keras and Tensorflow -==================== - -The package SciKeras_ brings a Scikit-learn API to Keras. Install directions -are at https://github.com/adriangb/scikeras/blob/master/README.md#installation. - -Example usage -------------- - -First, let's start by defining normal function to create our model. This is the -normal way to create a `Keras Sequential model`_ - -.. _Keras Sequential model: https://keras.io/api/models/sequential/ - -.. code-block:: python - - import tensorflow as tf - from tensorflow.keras.layers import Dense, Activation, Dropout - from tensorflow.keras.models import Sequential - - def build_model(lr=0.01): - layers = [Dense(512, input_shape=(784,), activation="relu"), - Dense(10, input_shape=(512,), activation="softmax")] - model = Sequential(layers) - - opt = tf.keras.optimizers.SGD(learning_rate=lr) - model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) - return model - -Now, we can use the SciKeras to create a Scikit-learn compatible model: - -.. code-block:: python - - from scikeras.wrappers import KerasClassifier, KerasRegressor - model = KerasClassifier(build_fn=build_model, lr=0.1) - -This model will work with all of Dask-ML: it expects NumPy arrays as inputs and -obeys the Scikit-learn API. For example, it's possible to use Dask-ML to do the -following: - -* Use Keras with Dask-ML's model selection, including - :class:`~dask_ml.model_selection.HyperbandSearchCV`. -* Use Keras with Dask-ML's :class:`~dask_ml.wrappers.Incremental`. - -.. _SciKeras: https://github.com/adriangb/scikeras diff --git a/docs/source/pytorch.rst b/docs/source/pytorch.rst index bc42ea165..2219097ac 100644 --- a/docs/source/pytorch.rst +++ b/docs/source/pytorch.rst @@ -1,8 +1,14 @@ PyTorch ======= -Skorch_ brings a Scikit-learn API to PyTorch_. We encourage looking at the -Skorch documentation for complete details. +Skorch_ brings a Scikit-learn API to PyTorch_. Skorch allows PyTorch models to +be wrapped in Scikit-learn compatible estimators. So, that means that PyTorch +models wrapped in Skorch can be used with the rest of the Dask-ML API. For +example, using Dask-ML's :class:`~dask_ml.model_selection.HyperbandSearchCV` or +:class:`~dask_ml.model_selection.Incremental` with PyTorch is possible after +wrapping with Skorch. + +We encourage looking at the Skorch documentation for complete details. Example usage ------------- @@ -48,8 +54,8 @@ With this, it's easy to use Skorch: **niceties, ) -Each parameter that the PyTorch `nn.Module` takes is prefixed with `module__`, -and same for the optimizer (`optim.SGD` takes a `lr` and `momentum` +Each parameter that the PyTorch ``nn.Module`` takes is prefixed with ``module__``, +and same for the optimizer (``optim.SGD`` takes a ``lr`` and ``momentum`` parameters). The ``niceties`` make sure Skorch uses all the data for training and doesn't print excessive amounts of logs. From c31c6000167529ebd3c2ed498b0c4d89fea991a2 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 25 Jul 2020 19:49:59 -0500 Subject: [PATCH 29/41] Remove extra installs --- ci/posix.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index 1bde3b991..e89d22171 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -35,14 +35,10 @@ jobs: displayName: "install" - bash: | - conda install -y pytorch torchvision -c pytorch + conda install -y pytorch -c pytorch pip install skorch - pip install tensorflow scikeras keras - pip install -U git+https://github.com/adriangb/scikeras.git - displayName: "install Tensorflow/PyTorch" + displayName: "install PyTorch" # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') - # Installing from git is temporary: see - # https://github.com/adriangb/scikeras/pull/17#issuecomment-659064357 - script: | source activate dask-ml-test From 4dc1d0a520a53ba8fbaa9b316b509febb651ae59 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 17:25:06 -0500 Subject: [PATCH 30/41] lint --- tests/model_selection/test_keras.py | 67 --------------------------- tests/model_selection/test_pytorch.py | 18 +++---- 2 files changed, 10 insertions(+), 75 deletions(-) delete mode 100644 tests/model_selection/test_keras.py diff --git a/tests/model_selection/test_keras.py b/tests/model_selection/test_keras.py deleted file mode 100644 index 784f94a18..000000000 --- a/tests/model_selection/test_keras.py +++ /dev/null @@ -1,67 +0,0 @@ -import pickle -from typing import Tuple - -import numpy as np -import pytest -from distributed.utils_test import gen_cluster -from scipy.stats import loguniform, uniform -from sklearn.base import clone -from sklearn.datasets import make_classification, make_regression -from sklearn.exceptions import DataConversionWarning -from sklearn.model_selection import RandomizedSearchCV - -from dask_ml.model_selection import IncrementalSearchCV - -import pytest -pytest.importorskip("tensorflow") -pytest.importorskip("scikeras") - -import tensorflow as tf -from tensorflow.keras.datasets import mnist as keras_mnist -from tensorflow.keras.layers import Dense, Activation, Dropout -from tensorflow.keras.models import Sequential -from tensorflow.keras.utils import to_categorical -from scikeras.wrappers import KerasClassifier, KerasRegressor - - -def mnist() -> Tuple[np.ndarray, np.ndarray]: - (X_train, y_train), _ = keras_mnist.load_data() - X_train = X_train[:100] - y_train = y_train[:100] - X_train = X_train.reshape(X_train.shape[0], 784) - X_train = X_train.astype("float32") - X_train /= 255 - Y_train = to_categorical(y_train, 10) - return X_train, y_train - - -def _keras_build_fn(lr=0.01): - layers = [ - Dense(512, input_shape=(784,), activation="relu"), - Dense(10, input_shape=(512,), activation="softmax"), - ] - - # See https://github.com/adriangb/scikeras/issues/24 - try: - model = Sequential(layers) - except TypeError: - model = Sequential(layers) - - opt = tf.keras.optimizers.SGD(learning_rate=lr) - model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) - return model - - -@gen_cluster(client=True) -def test_keras(c, s, a, b): - X, y = mnist() - assert X.ndim == 2 and X.shape[-1] == 784 - assert y.ndim == 1 and len(X) == len(y) - assert isinstance(X, np.ndarray) and isinstance(y, np.ndarray) - - model = KerasClassifier(build_fn=_keras_build_fn, epochs=1, lr=0.1) - params = {"lr": loguniform(1e-3, 1e-1)} - - search = IncrementalSearchCV(model, params, max_iter=2, decay_rate=None) - yield search.fit(X, y, epochs=1) - assert search.best_score_ >= 0 diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 92b4b7b7e..66711111a 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,16 +1,18 @@ import pytest -pytest.importorskip("torch") -pytest.importorskip("skorch") - -from sklearn.base import clone -from distributed.utils_test import gen_cluster -from scipy.stats import loguniform -import torch.optim as optim import torch.nn as nn import torch.nn.functional as F -from skorch import NeuralNetClassifier, NeuralNetRegressor +import torch.optim as optim +from distributed.utils_test import gen_cluster +from scipy.stats import loguniform +from sklearn.base import clone from sklearn.datasets import make_regression + from dask_ml.model_selection import IncrementalSearchCV +from skorch import NeuralNetClassifier, NeuralNetRegressor + +pytest.importorskip("torch") +pytest.importorskip("skorch") + class ShallowNet(nn.Module): From bfaf65a36a5ffcd0065ba6e68e9fcdf9cddb052e Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 17:26:39 -0500 Subject: [PATCH 31/41] skip isort for pytest importskip --- tests/model_selection/test_pytorch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 66711111a..ee6864c9c 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,4 +1,7 @@ import pytest +pytest.importorskip("torch") # isort:skip +pytest.importorskip("skorch") # isort:skip + import torch.nn as nn import torch.nn.functional as F import torch.optim as optim @@ -10,8 +13,6 @@ from dask_ml.model_selection import IncrementalSearchCV from skorch import NeuralNetClassifier, NeuralNetRegressor -pytest.importorskip("torch") -pytest.importorskip("skorch") From b944890a39a2b3789396b0731e2c4e7115d85931 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 17:35:59 -0500 Subject: [PATCH 32/41] isort --- tests/model_selection/test_pytorch.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index ee6864c9c..7598216e9 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,4 +1,5 @@ import pytest + pytest.importorskip("torch") # isort:skip pytest.importorskip("skorch") # isort:skip @@ -9,11 +10,9 @@ from scipy.stats import loguniform from sklearn.base import clone from sklearn.datasets import make_regression - -from dask_ml.model_selection import IncrementalSearchCV from skorch import NeuralNetClassifier, NeuralNetRegressor - +from dask_ml.model_selection import IncrementalSearchCV class ShallowNet(nn.Module): @@ -27,8 +26,6 @@ def forward(self, x): @gen_cluster(client=True) def test_pytorch(c, s, a, b): - pytest.importorskip("torch") - pytest.importorskip("skorch") n_features = 10 defaults = { From 96489f5f9fac2527fb10f428fe1d75443ed9cce2 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 18:01:15 -0500 Subject: [PATCH 33/41] isort skip --- tests/model_selection/test_pytorch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 7598216e9..288ac6b4f 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,7 +1,8 @@ +# isort: off import pytest - -pytest.importorskip("torch") # isort:skip -pytest.importorskip("skorch") # isort:skip +pytest.importorskip("torch") +pytest.importorskip("skorch") +# isort: on import torch.nn as nn import torch.nn.functional as F From 99f2fd03a50a6ba022571fc6ad521bc66531fd7e Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 21:35:25 -0500 Subject: [PATCH 34/41] clean --- ci/posix.yaml | 1 - dask_ml/model_selection/_incremental.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index e89d22171..9afc48367 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -20,7 +20,6 @@ jobs: sklearnDev: envFile: 'ci/environment-3.7.yaml' SKLEARN_DEV: "yes" - WRAPPERS: "no" steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py index 081cc1b9c..ab7c027e4 100644 --- a/dask_ml/model_selection/_incremental.py +++ b/dask_ml/model_selection/_incremental.py @@ -526,7 +526,7 @@ async def _validate_parameters(self, X, y): kwargs = dict(accept_unknown_chunks=True, accept_dask_dataframe=True) if not isinstance(X, dd.DataFrame): X = self._check_array(X, **kwargs) - if not isinstance(y, dd.DataFrame): + if not isinstance(y, (dd.DataFrame, dd.Series)): y = self._check_array(y, ensure_2d=False, **kwargs) estimator = self.estimator if isinstance(estimator, Future): From 580d77e77dbdc5590fc9ae98cd79bafc86c65bd1 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 26 Jul 2020 21:36:29 -0500 Subject: [PATCH 35/41] isort --- tests/model_selection/test_pytorch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 288ac6b4f..90dcc5ae3 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,5 +1,6 @@ # isort: off import pytest + pytest.importorskip("torch") pytest.importorskip("skorch") # isort: on From 5b6e20c277443d3b765068aa0f45fbd788d3e838 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 27 Jul 2020 08:49:54 -0500 Subject: [PATCH 36/41] lint --- tests/model_selection/test_pytorch.py | 35 +++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 90dcc5ae3..387129ca2 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -1,29 +1,28 @@ -# isort: off import pytest - -pytest.importorskip("torch") -pytest.importorskip("skorch") -# isort: on - -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim from distributed.utils_test import gen_cluster from scipy.stats import loguniform from sklearn.base import clone from sklearn.datasets import make_regression -from skorch import NeuralNetClassifier, NeuralNetRegressor from dask_ml.model_selection import IncrementalSearchCV +try: + import torch.nn as nn + import torch.nn.functional as F + import torch.optim as optim + from skorch import NeuralNetRegressor +except ImportError: + pytestmark = pytest.mark.skip(reason="Missing pytorch or skorch.") + +else: -class ShallowNet(nn.Module): - def __init__(self, n_features=5): - super().__init__() - self.layer1 = nn.Linear(n_features, 1) + class ShallowNet(nn.Module): + def __init__(self, n_features=5): + super().__init__() + self.layer1 = nn.Linear(n_features, 1) - def forward(self, x): - return F.relu(self.layer1(x)) + def forward(self, x): + return F.relu(self.layer1(x)) @gen_cluster(client=True) @@ -47,8 +46,8 @@ def test_pytorch(c, s, a, b): ) model2 = clone(model) - assert model.callbacks == False - assert model.warm_start == False + assert model.callbacks is False + assert model.warm_start is False assert model.train_split is None assert model.max_epochs == 1 From 9478728f84c8896ccee80bd16df8bcefaccfd237 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 28 Jul 2020 11:47:21 -0500 Subject: [PATCH 37/41] try install deps in right env --- ci/posix.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index 9afc48367..f3c9fe657 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -34,8 +34,9 @@ jobs: displayName: "install" - bash: | - conda install -y pytorch -c pytorch - pip install skorch + conda install -y pytorch -c pytorch -n dask-ml-test + source activate dask-ml-test + pip install skorch --no-deps displayName: "install PyTorch" # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') From 79b88910826bbee912d8d7a7779354bdb4927805 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 28 Jul 2020 19:02:40 -0500 Subject: [PATCH 38/41] no cuda on ci --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index f3c9fe657..e6e5ca9ff 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -34,7 +34,7 @@ jobs: displayName: "install" - bash: | - conda install -y pytorch -c pytorch -n dask-ml-test + conda install -y pytorch cpuonly -c pytorch -n dask-ml-test source activate dask-ml-test pip install skorch --no-deps displayName: "install PyTorch" From ea2b4f5d55dd62d84d563299e73b0f857a921a65 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 28 Jul 2020 19:04:53 -0500 Subject: [PATCH 39/41] remove --no-deps for skorch we have all the deps already --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index e6e5ca9ff..5ce9d58d1 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -36,7 +36,7 @@ jobs: - bash: | conda install -y pytorch cpuonly -c pytorch -n dask-ml-test source activate dask-ml-test - pip install skorch --no-deps + pip install skorch displayName: "install PyTorch" # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') From 262815ddb0c2c4fbdcfc2b81fc870e73c64b9008 Mon Sep 17 00:00:00 2001 From: Scott Date: Tue, 28 Jul 2020 19:05:58 -0500 Subject: [PATCH 40/41] quiet --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index 5ce9d58d1..f305f7a5c 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -34,7 +34,7 @@ jobs: displayName: "install" - bash: | - conda install -y pytorch cpuonly -c pytorch -n dask-ml-test + conda install -y -q pytorch cpuonly -c pytorch -n dask-ml-test source activate dask-ml-test pip install skorch displayName: "install PyTorch" From 58b105ce47307d03cff8a1139ea844a73a5c2741 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 28 Jul 2020 20:27:09 -0500 Subject: [PATCH 41/41] Update ci/posix.yaml --- ci/posix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/posix.yaml b/ci/posix.yaml index f305f7a5c..10e136e78 100644 --- a/ci/posix.yaml +++ b/ci/posix.yaml @@ -38,7 +38,7 @@ jobs: source activate dask-ml-test pip install skorch displayName: "install PyTorch" - # condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') + condition: eq(variables['Build.SourceBranch'], 'refs/heads/master') - script: | source activate dask-ml-test