Skip to content
This repository has been archived by the owner on Jun 11, 2022. It is now read-only.

Commit

Permalink
Added more
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed May 17, 2018
1 parent 00bf7b3 commit 1adcb4a
Show file tree
Hide file tree
Showing 18 changed files with 452 additions and 0 deletions.
34 changes: 34 additions & 0 deletions dask_ml_benchmarks/plot_logistic_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
.. _plot_logistic_regression_example.py:
Logistic Regression Example
===========================
Comparison of scaling.
"""
from dask_ml.datasets import make_classification
import pandas as pd

from timeit import default_timer as tic
import sklearn.linear_model
import dask_ml.linear_model
import seaborn as sns

Ns = [2500, 5000, 7500, 10000]

timings = []

for n in Ns:
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n,
chunks=n // 20)
t1 = tic()
sklearn.linear_model.LogisticRegression().fit(X, y)
timings.append(('Scikit-Learn', n, tic() - t1))
t1 = tic()
dask_ml.linear_model.LogisticRegression().fit(X, y)
timings.append(('dask-ml', n, tic() - t1))


df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method',
data=df, aspect=1.5)
38 changes: 38 additions & 0 deletions dask_ml_benchmarks/plot_spectral_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
.. _plot_spectral_clustering_example.py:
Spectral Clustering Example
===========================
This example shows how dask-ml's ``SpectralClustering`` scales with the
number of samples, compared to scikit-learn's implementation. The dask
version uses an approximation to the affinity matrix, which avoids an
expensive computation at the cost of some approximation error.
"""
from sklearn.datasets import make_circles
from sklearn.utils import shuffle
import pandas as pd

from timeit import default_timer as tic
import sklearn.cluster
import dask_ml.cluster
import seaborn as sns

Ns = [2500, 5000, 7500, 10000]
X, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5)
X, y = shuffle(X, y)

timings = []
for n in Ns:
X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5)
t1 = tic()
sklearn.cluster.SpectralClustering(n_clusters=2).fit(X)
timings.append(('Scikit-Learn (exact)', n, tic() - t1))
t1 = tic()
dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X)
timings.append(('dask-ml (approximate)', n, tic() - t1))


df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method',
data=df, aspect=1.5)
Binary file modified docs/source/auto_examples/auto_examples_jupyter.zip
Binary file not shown.
Binary file modified docs/source/auto_examples/auto_examples_python.zip
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 40 additions & 0 deletions docs/source/auto_examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,46 @@



.. raw:: html

<div class="sphx-glr-thumbcontainer" tooltip="Comparison of scaling. ">

.. only:: html

.. figure:: /auto_examples/images/thumb/sphx_glr_plot_logistic_regression_thumb.png

:ref:`sphx_glr_auto_examples_plot_logistic_regression.py`

.. raw:: html

</div>


.. toctree::
:hidden:

/auto_examples/plot_logistic_regression

.. raw:: html

<div class="sphx-glr-thumbcontainer" tooltip="This example shows how dask-ml&#x27;s ``SpectralClustering`` scales with the number of samples, comp...">

.. only:: html

.. figure:: /auto_examples/images/thumb/sphx_glr_plot_spectral_clustering_thumb.png

:ref:`sphx_glr_auto_examples_plot_spectral_clustering.py`

.. raw:: html

</div>


.. toctree::
:hidden:

/auto_examples/plot_spectral_clustering

.. raw:: html

<div class="sphx-glr-thumbcontainer" tooltip="This example demonstrates :class:`dask_ml.wrappers.ParallelPostFit`. A :class:`sklearn.svm.SVC`...">
Expand Down
54 changes: 54 additions & 0 deletions docs/source/auto_examples/plot_logistic_regression.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n\nLogistic Regression Example\n===========================\n\nComparison of scaling.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from dask_ml.datasets import make_classification\nimport pandas as pd\n\nfrom timeit import default_timer as tic\nimport sklearn.linear_model\nimport dask_ml.linear_model\nimport seaborn as sns\n\nNs = [2500, 5000, 7500, 10000]\n\ntimings = []\n\nfor n in Ns:\n X, y = make_classification(n_samples=n, n_features=1_000, random_state=n,\n chunks=n // 20)\n t1 = tic()\n sklearn.linear_model.LogisticRegression().fit(X, y)\n timings.append(('Scikit-Learn', n, tic() - t1))\n t1 = tic()\n dask_ml.linear_model.LogisticRegression().fit(X, y)\n timings.append(('dask-ml', n, tic() - t1))\n\n\ndf = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])\nsns.factorplot(x='Number of Samples', y='Fit Time', hue='method',\n data=df, aspect=1.5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
34 changes: 34 additions & 0 deletions docs/source/auto_examples/plot_logistic_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
.. _plot_logistic_regression_example.py:
Logistic Regression Example
===========================
Comparison of scaling.
"""
from dask_ml.datasets import make_classification
import pandas as pd

from timeit import default_timer as tic
import sklearn.linear_model
import dask_ml.linear_model
import seaborn as sns

Ns = [2500, 5000, 7500, 10000]

timings = []

for n in Ns:
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n,
chunks=n // 20)
t1 = tic()
sklearn.linear_model.LogisticRegression().fit(X, y)
timings.append(('Scikit-Learn', n, tic() - t1))
t1 = tic()
dask_ml.linear_model.LogisticRegression().fit(X, y)
timings.append(('dask-ml', n, tic() - t1))


df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method',
data=df, aspect=1.5)
1 change: 1 addition & 0 deletions docs/source/auto_examples/plot_logistic_regression.py.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
34aaf9e462bc041dcf9c87ceacb96d0f
76 changes: 76 additions & 0 deletions docs/source/auto_examples/plot_logistic_regression.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@


.. _sphx_glr_auto_examples_plot_logistic_regression.py:


.. _plot_logistic_regression_example.py:

Logistic Regression Example
===========================

Comparison of scaling.




.. image:: /auto_examples/images/sphx_glr_plot_logistic_regression_001.png
:align: center





.. code-block:: python
from dask_ml.datasets import make_classification
import pandas as pd
from timeit import default_timer as tic
import sklearn.linear_model
import dask_ml.linear_model
import seaborn as sns
Ns = [2500, 5000, 7500, 10000]
timings = []
for n in Ns:
X, y = make_classification(n_samples=n, n_features=1_000, random_state=n,
chunks=n // 20)
t1 = tic()
sklearn.linear_model.LogisticRegression().fit(X, y)
timings.append(('Scikit-Learn', n, tic() - t1))
t1 = tic()
dask_ml.linear_model.LogisticRegression().fit(X, y)
timings.append(('dask-ml', n, tic() - t1))
df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method',
data=df, aspect=1.5)
**Total running time of the script:** ( 5 minutes 0.900 seconds)



.. only :: html
.. container:: sphx-glr-footer
.. container:: sphx-glr-download
:download:`Download Python source code: plot_logistic_regression.py <plot_logistic_regression.py>`
.. container:: sphx-glr-download
:download:`Download Jupyter notebook: plot_logistic_regression.ipynb <plot_logistic_regression.ipynb>`
.. only:: html

.. rst-class:: sphx-glr-signature

`Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
54 changes: 54 additions & 0 deletions docs/source/auto_examples/plot_spectral_clustering.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n\nSpectral Clustering Example\n===========================\n\nThis example shows how dask-ml's ``SpectralClustering`` scales with the\nnumber of samples, compared to scikit-learn's implementation. The dask\nversion uses an approximation to the affinity matrix, which avoids an\nexpensive computation at the cost of some approximation error.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.datasets import make_circles\nfrom sklearn.utils import shuffle\nimport pandas as pd\n\nfrom timeit import default_timer as tic\nimport sklearn.cluster\nimport dask_ml.cluster\nimport seaborn as sns\n\nNs = [2500, 5000, 7500, 10000]\nX, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5)\nX, y = shuffle(X, y)\n\ntimings = []\nfor n in Ns:\n X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5)\n t1 = tic()\n sklearn.cluster.SpectralClustering(n_clusters=2).fit(X)\n timings.append(('Scikit-Learn (exact)', n, tic() - t1))\n t1 = tic()\n dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X)\n timings.append(('dask-ml (approximate)', n, tic() - t1))\n\n\ndf = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])\nsns.factorplot(x='Number of Samples', y='Fit Time', hue='method',\n data=df, aspect=1.5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
38 changes: 38 additions & 0 deletions docs/source/auto_examples/plot_spectral_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
.. _plot_spectral_clustering_example.py:
Spectral Clustering Example
===========================
This example shows how dask-ml's ``SpectralClustering`` scales with the
number of samples, compared to scikit-learn's implementation. The dask
version uses an approximation to the affinity matrix, which avoids an
expensive computation at the cost of some approximation error.
"""
from sklearn.datasets import make_circles
from sklearn.utils import shuffle
import pandas as pd

from timeit import default_timer as tic
import sklearn.cluster
import dask_ml.cluster
import seaborn as sns

Ns = [2500, 5000, 7500, 10000]
X, y = make_circles(n_samples=10_000, noise=0.05, random_state=0, factor=0.5)
X, y = shuffle(X, y)

timings = []
for n in Ns:
X, y = make_circles(n_samples=n, random_state=n, noise=0.5, factor=0.5)
t1 = tic()
sklearn.cluster.SpectralClustering(n_clusters=2).fit(X)
timings.append(('Scikit-Learn (exact)', n, tic() - t1))
t1 = tic()
dask_ml.cluster.SpectralClustering(n_clusters=2, n_components=100).fit(X)
timings.append(('dask-ml (approximate)', n, tic() - t1))


df = pd.DataFrame(timings, columns=['method', 'Number of Samples', 'Fit Time'])
sns.factorplot(x='Number of Samples', y='Fit Time', hue='method',
data=df, aspect=1.5)
1 change: 1 addition & 0 deletions docs/source/auto_examples/plot_spectral_clustering.py.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
649f44189a9a45d720be774f4d86979d

0 comments on commit 1adcb4a

Please sign in to comment.