intel
diff --git a/‎CHANGELOG.md‎
Lines changed: 10 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎dffml/util/asynctestcase.py‎
Lines changed: 66 additions & 1 deletion b/‎dffml/util/asynctestcase.py‎
Lines changed: 66 additions & 1 deletion
diff --git a/‎docs/plugins/dffml_model.rst‎
Lines changed: 108 additions & 3 deletions b/‎docs/plugins/dffml_model.rst‎
Lines changed: 108 additions & 3 deletions
@@ -6,6 +6,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- scikit models
+  - Clusterers
+    - KMeans
+    - Birch
+    - MiniBatchKMeans
+    - AffinityPropagation
+    - MeanShift
+    - SpectralClustering
+    - AgglomerativeClustering
+    - OPTICS
 - `allowempty` added to source config parameters.
 - Quickstart document to show how to use models from Python.
 - The latest release of the documentation now includes a link to the
 
@@ -2,6 +2,17 @@
 # Copyright (c) 2019 Intel Corporation
 """
 Adds support for test cases which need to be run in an event loop.
+
+Also contains a class integration tests can derive from. The integration
+tests can declare which of the plugins (that are a part of the main repo) they
+require to run. The test will be skipped if the plugin isn't installed in
+development mode.
+
+To install all plugins in development mode
+$ dffml service dev install
+
+Add the -user flag to install to ~/.local
+
 """
 import os
 import random
@@ -12,7 +23,22 @@
 import unittest
 import tempfile
 import contextlib
-from typing import Optional
+
+import re
+import io
+import json
+from typing import Dict, Any, Optional
+
+from dffml.repo import Repo
+from dffml.base import config
+from dffml.df.types import Definition, Operation, DataFlow, Input
+from dffml.df.base import op
+from dffml.cli.cli import CLI
+from dffml.model.model import Model
+from dffml.service.dev import Develop
+from dffml.util.packaging import is_develop
+from dffml.util.entrypoint import load
+from dffml.config.config import BaseConfigLoader
 
 
 class AsyncTestCase(unittest.TestCase):
@@ -97,3 +123,42 @@ def mktempfile(
         if text:
             pathlib.Path(filename).write_text(inspect.cleandoc(text) + "\n")
         return filename
+
+
+def relative_path(*args):
+    """
+    Returns a pathlib.Path object with the path relative to this file.
+    """
+    target = pathlib.Path(__file__).parents[0] / args[0]
+    for path in list(args)[1:]:
+        target /= path
+    return target
+
+
+@contextlib.contextmanager
+def relative_chdir(*args):
+    """
+    Change directory to a location relative to the location of this file.
+    """
+    target = relative_path(*args)
+    orig_dir = os.getcwd()
+    try:
+        os.chdir(target)
+        yield target
+    finally:
+        os.chdir(orig_dir)
+
+
+class IntegrationCLITestCase(AsyncExitStackTestCase):
+    REQUIRED_PLUGINS = []
+
+    async def setUp(self):
+        await super().setUp()
+        self.required_plugins(*self.REQUIRED_PLUGINS)
+        self.stdout = io.StringIO()
+
+    def required_plugins(self, *args):
+        if not all(map(is_develop, args)):
+            self.skipTest(
+                f"Required plugins: {', '.join(args)} must be installed in development mode"
+            )
@@ -471,6 +471,22 @@ Predicting with trained model:
 |                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 |                | MultinomialNB                 | scikitmnb      | `scikitmnb <https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html#sklearn.naive_bayes.MultinomialNB/>`_                                                    |
 +----------------+-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Clustering     | KMeans                        | scikitkmeans   | `scikitkmeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans/>`_                                                                       |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | Birch                         | scikitbirch    | `scikitbirch <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.Birch.html#sklearn.cluster.Birch/>`_                                                                          |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | MiniBatchKMeans               | scikitmbkmeans | `scikitmbkmeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MiniBatchKMeans.html#sklearn.cluster.MiniBatchKMeans/>`_                                                   |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | AffinityPropagation           | scikitap       | `scikitap <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AffinityPropagation.html#sklearn.cluster.AffinityPropagation/>`_                                                 |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | MeanShift                     | scikitms       | `scikitms <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html#sklearn.cluster.MeanShift/>`_                                                                     |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | SpectralClustering            | scikitsc       | `scikitsc <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralClustering.html#sklearn.cluster.SpectralClustering/>`_                                                   |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | AgglomerativeClustering       | scikitac       | `scikitac <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html#sklearn.cluster.AgglomerativeClustering/>`_                                         |
+|                +-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                | OPTICS                        | scikitoptics   | `scikitoptics <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.OPTICS.html#sklearn.cluster.OPTICS/>`_                                                                       |
++----------------+-------------------------------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
 **Usage Example:**
@@ -512,14 +528,14 @@ Let us take a simple example:
     $ dffml train \
         -model scikitlr \
         -model-features Years:int:1 Expertise:int:1 Trust:float:1 \
-        -model-predict Salary \
+        -model-predict Salary:float:1 \
         -sources f=csv \
         -source-filename train.csv \
         -log debug
     $ dffml accuracy \
         -model scikitlr \
         -model-features Years:int:1 Expertise:int:1 Trust:float:1 \
-        -model-predict Salary \
+        -model-predict Salary:float:1 \
         -sources f=csv \
         -source-filename test.csv \
         -log debug
@@ -528,7 +544,7 @@ Let us take a simple example:
       dffml predict all \
         -model scikitlr \
         -model-features Years:int:1 Expertise:int:1 Trust:float:1 \
-        -model-predict Salary \
+        -model-predict Salary:float:1 \
         -sources f=csv \
         -source-filename /dev/stdin \
         -log debug
@@ -549,3 +565,92 @@ Let us take a simple example:
         }
     ]
 
+
+Example below uses KMeans Clustering Model on a small randomly generated dataset.
+
+.. code-block:: console
+
+    $ cat > train.csv << EOF
+   Col1,          Col2,        Col3,         Col4
+   5.05776417,   8.55128116,   6.15193196,  -8.67349666
+   3.48864265,  -7.25952218,  -4.89216256,   4.69308946
+   -8.16207603,  5.16792984,  -2.66971993,   0.2401882
+   6.09809669,   8.36434181,   6.70940915,  -7.91491768
+   -9.39122566,  5.39133807,  -2.29760281,  -1.69672981
+   0.48311336,   8.19998973,   7.78641979,   7.8843821
+   2.22409135,  -7.73598586,  -4.02660224,   2.82101794
+   2.8137247 ,   8.36064298,   7.66196849,   3.12704676
+   EOF
+    $ cat > test.csv << EOF
+   Col1,             Col2,          Col3,         Col4,    cluster
+   -10.16770144,   2.73057215,  -1.49351481,   2.43005691,    6
+   3.59705381,  -4.76520663,  -3.34916068,   5.72391486,     1
+   4.01612313,  -4.641852  ,  -4.77333308,   5.87551683,     0
+   EOF
+    $ dffml train \
+        -model scikitkmeans \
+        -model-features Col1:float:1 Col2:float:1 Col3:float:1 Col4:float:1 \
+        -sources f=csv \
+        -source-filename train.csv \
+        -source-readonly \
+        -log debug
+    $ dffml accuracy \
+        -model scikitkmeans \
+        -model-features Col1:float:1 Col2:float:1 Col3:float:1 Col4:float:1\
+        -model-tcluster cluster:int:1 \
+        -sources f=csv \
+        -source-filename test.csv \
+        -source-readonly \
+        -log debug
+    0.6365141682948129
+    $ echo -e 'Col1,Col2,Col3,Col4\n6.09809669,8.36434181,6.70940915,-7.91491768\n' | \
+      dffml predict all \
+        -model scikitkmeans \
+        -model-features Col1:float:1 Col2:float:1 Col3:float:1 Col4:float:1 \
+        -sources f=csv \
+        -source-filename /dev/stdin \
+        -source-readonly \
+        -log debug
+    [
+    {
+        "extra": {},
+        "features": {
+            "Col1": 6.09809669,
+            "Col2": 8.36434181,
+            "Col3": 6.70940915,
+            "Col4": -7.91491768
+        },
+        "last_updated": "2020-01-12T22:51:15Z",
+        "prediction": {
+            "confidence": 0.6365141682948129,
+            "value": 2
+        },
+        "src_url": "0"
+    }
+    ]
+
+**NOTE**: `Transductive <https://scikit-learn.org/stable/glossary.html#term-transductive/>`_ Clusterers(scikitsc, scikitac, scikitoptics) cannot handle unseen data.
+Ensure that `predict` and `accuracy` for these algorithms uses training data.
+
+**Args**
+
+- predict: Feature
+
+  - Label or the value to be predicted
+  - Only used by classification and regression models
+
+- tcluster: Feature
+
+  - True cluster, only used by clustering models
+  - Passed with `accuracy` to return `mutual_info_score` 
+  - If not passed `accuracy` returns `silhouette_score`
+
+- features: List of features
+
+  - Features to train on
+
+- directory: String
+
+  - default: /home/user/.cache/dffml/scikit-{Entrypoint}
+  - Directory where state should be saved
+