This repository has been archived by the owner on Jun 11, 2022. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
00bf7b3
commit 1adcb4a
Showing
18 changed files
with
452 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
""" | ||
.. _plot_logistic_regression_example.py: | ||
Logistic Regression Example | ||
=========================== | ||
Comparison of scaling. | ||
""" | ||
from dask_ml.datasets import make_classification | ||
import pandas as pd | ||
|
||
from timeit import default_timer as tic | ||
import sklearn.linear_model | ||
import dask_ml.linear_model | ||
import seaborn as sns | ||
|
||
Ns = [2500, 5000, 7500, 10000] | ||
|
||
timings = [] | ||
|
||
for n in Ns: | ||
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n, | ||
chunks=n // 20) | ||
t1 = tic() | ||
sklearn.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('Scikit-Learn', n, tic() - t1)) | ||
t1 = tic() | ||
dask_ml.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('dask-ml', n, tic() - t1)) | ||
|
||
|
||
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time']) | ||
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method', | ||
data=df, aspect=1.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" | ||
.. _plot_spectral_clustering_example.py: | ||
Spectral Clustering Example | ||
=========================== | ||
This example shows how dask-ml's ``SpectralClustering`` scales with the | ||
number of samples, compared to scikit-learn's implementation. The dask | ||
version uses an approximation to the affinity matrix, which avoids an | ||
expensive computation at the cost of some approximation error. | ||
""" | ||
from sklearn.datasets import make_circles | ||
from sklearn.utils import shuffle | ||
import pandas as pd | ||
|
||
from timeit import default_timer as tic | ||
import sklearn.cluster | ||
import dask_ml.cluster | ||
import seaborn as sns | ||
|
||
Ns = [2500, 5000, 7500, 10000] | ||
X, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5) | ||
X, y = shuffle(X, y) | ||
|
||
timings = [] | ||
for n in Ns: | ||
X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5) | ||
t1 = tic() | ||
sklearn.cluster.SpectralClustering(n_clusters=2).fit(X) | ||
timings.append(('Scikit-Learn (exact)', n, tic() - t1)) | ||
t1 = tic() | ||
dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X) | ||
timings.append(('dask-ml (approximate)', n, tic() - t1)) | ||
|
||
|
||
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time']) | ||
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method', | ||
data=df, aspect=1.5) |
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+22 KB
docs/source/auto_examples/images/sphx_glr_plot_logistic_regression_001.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+26.5 KB
docs/source/auto_examples/images/sphx_glr_plot_spectral_clustering_001.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+13.4 KB
docs/source/auto_examples/images/thumb/sphx_glr_plot_logistic_regression_thumb.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+13.5 KB
docs/source/auto_examples/images/thumb/sphx_glr_plot_spectral_clustering_thumb.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"\n\nLogistic Regression Example\n===========================\n\nComparison of scaling.\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from dask_ml.datasets import make_classification\nimport pandas as pd\n\nfrom timeit import default_timer as tic\nimport sklearn.linear_model\nimport dask_ml.linear_model\nimport seaborn as sns\n\nNs = [2500, 5000, 7500, 10000]\n\ntimings = []\n\nfor n in Ns:\n X, y = make_classification(n_samples=n, n_features=1_000, random_state=n,\n chunks=n // 20)\n t1 = tic()\n sklearn.linear_model.LogisticRegression().fit(X, y)\n timings.append(('Scikit-Learn', n, tic() - t1))\n t1 = tic()\n dask_ml.linear_model.LogisticRegression().fit(X, y)\n timings.append(('dask-ml', n, tic() - t1))\n\n\ndf = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])\nsns.factorplot(x='Number of Samples', y='Fit Time', hue='method',\n data=df, aspect=1.5)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
""" | ||
.. _plot_logistic_regression_example.py: | ||
Logistic Regression Example | ||
=========================== | ||
Comparison of scaling. | ||
""" | ||
from dask_ml.datasets import make_classification | ||
import pandas as pd | ||
|
||
from timeit import default_timer as tic | ||
import sklearn.linear_model | ||
import dask_ml.linear_model | ||
import seaborn as sns | ||
|
||
Ns = [2500, 5000, 7500, 10000] | ||
|
||
timings = [] | ||
|
||
for n in Ns: | ||
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n, | ||
chunks=n // 20) | ||
t1 = tic() | ||
sklearn.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('Scikit-Learn', n, tic() - t1)) | ||
t1 = tic() | ||
dask_ml.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('dask-ml', n, tic() - t1)) | ||
|
||
|
||
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time']) | ||
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method', | ||
data=df, aspect=1.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
34aaf9e462bc041dcf9c87ceacb96d0f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
|
||
|
||
.. _sphx_glr_auto_examples_plot_logistic_regression.py: | ||
|
||
|
||
.. _plot_logistic_regression_example.py: | ||
|
||
Logistic Regression Example | ||
=========================== | ||
|
||
Comparison of scaling. | ||
|
||
|
||
|
||
|
||
.. image:: /auto_examples/images/sphx_glr_plot_logistic_regression_001.png | ||
:align: center | ||
|
||
|
||
|
||
|
||
|
||
.. code-block:: python | ||
from dask_ml.datasets import make_classification | ||
import pandas as pd | ||
from timeit import default_timer as tic | ||
import sklearn.linear_model | ||
import dask_ml.linear_model | ||
import seaborn as sns | ||
Ns = [2500, 5000, 7500, 10000] | ||
timings = [] | ||
for n in Ns: | ||
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n, | ||
chunks=n // 20) | ||
t1 = tic() | ||
sklearn.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('Scikit-Learn', n, tic() - t1)) | ||
t1 = tic() | ||
dask_ml.linear_model.LogisticRegression().fit(X, y) | ||
timings.append(('dask-ml', n, tic() - t1)) | ||
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time']) | ||
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method', | ||
data=df, aspect=1.5) | ||
**Total running time of the script:** ( 5 minutes 0.900 seconds) | ||
|
||
|
||
|
||
.. only :: html | ||
.. container:: sphx-glr-footer | ||
.. container:: sphx-glr-download | ||
:download:`Download Python source code: plot_logistic_regression.py <plot_logistic_regression.py>` | ||
.. container:: sphx-glr-download | ||
:download:`Download Jupyter notebook: plot_logistic_regression.ipynb <plot_logistic_regression.ipynb>` | ||
.. only:: html | ||
|
||
.. rst-class:: sphx-glr-signature | ||
|
||
`Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"\n\nSpectral Clustering Example\n===========================\n\nThis example shows how dask-ml's ``SpectralClustering`` scales with the\nnumber of samples, compared to scikit-learn's implementation. The dask\nversion uses an approximation to the affinity matrix, which avoids an\nexpensive computation at the cost of some approximation error.\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from sklearn.datasets import make_circles\nfrom sklearn.utils import shuffle\nimport pandas as pd\n\nfrom timeit import default_timer as tic\nimport sklearn.cluster\nimport dask_ml.cluster\nimport seaborn as sns\n\nNs = [2500, 5000, 7500, 10000]\nX, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5)\nX, y = shuffle(X, y)\n\ntimings = []\nfor n in Ns:\n X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5)\n t1 = tic()\n sklearn.cluster.SpectralClustering(n_clusters=2).fit(X)\n timings.append(('Scikit-Learn (exact)', n, tic() - t1))\n t1 = tic()\n dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X)\n timings.append(('dask-ml (approximate)', n, tic() - t1))\n\n\ndf = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])\nsns.factorplot(x='Number of Samples', y='Fit Time', hue='method',\n data=df, aspect=1.5)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" | ||
.. _plot_spectral_clustering_example.py: | ||
Spectral Clustering Example | ||
=========================== | ||
This example shows how dask-ml's ``SpectralClustering`` scales with the | ||
number of samples, compared to scikit-learn's implementation. The dask | ||
version uses an approximation to the affinity matrix, which avoids an | ||
expensive computation at the cost of some approximation error. | ||
""" | ||
from sklearn.datasets import make_circles | ||
from sklearn.utils import shuffle | ||
import pandas as pd | ||
|
||
from timeit import default_timer as tic | ||
import sklearn.cluster | ||
import dask_ml.cluster | ||
import seaborn as sns | ||
|
||
Ns = [2500, 5000, 7500, 10000] | ||
X, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5) | ||
X, y = shuffle(X, y) | ||
|
||
timings = [] | ||
for n in Ns: | ||
X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5) | ||
t1 = tic() | ||
sklearn.cluster.SpectralClustering(n_clusters=2).fit(X) | ||
timings.append(('Scikit-Learn (exact)', n, tic() - t1)) | ||
t1 = tic() | ||
dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X) | ||
timings.append(('dask-ml (approximate)', n, tic() - t1)) | ||
|
||
|
||
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time']) | ||
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method', | ||
data=df, aspect=1.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
649f44189a9a45d720be774f4d86979d |
Oops, something went wrong.