high_level: Accept lists for data arguments

Signed-off-by: Hashim Chaudry <hashimchaudry23@gmail.com>
intel · Jan 26, 2022 · 714d325 · 714d325
1 parent 3d09e28
commit 714d325
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   same, it still accepts a name or a path.
 - Renamed `accuracy()` to `score()`.
 - Renamed `Optimizer` to `Tuner`.
+- High-level functions now accept list for data.
 ### Fixed
 - Record object key properties are now always strings
 - High level functions (`train()`, etc.) now work on existing open contexts

diff --git a/dffml/high_level/ml.py b/dffml/high_level/ml.py
@@ -1,15 +1,15 @@
 import contextlib
-from typing import Union, Dict, Any
+from typing import Union, Dict, Any, List
 
 from ..record import Record
 from ..source.source import BaseSource
 from ..feature import Feature, Features
 from ..model import Model, ModelContext
-from ..util.internal import records_to_sources
+from ..util.internal import records_to_sources, list_records_to_dict
 from ..accuracy.accuracy import AccuracyScorer, AccuracyContext
 
 
-async def train(model, *args: Union[BaseSource, Record, Dict[str, Any]]):
+async def train(model, *args: Union[BaseSource, Record, Dict[str, Any], List]):
     """
     Train a machine learning model.
 
@@ -51,6 +51,23 @@ async def train(model, *args: Union[BaseSource, Record, Dict[str, Any]]):
     >>>
     >>> asyncio.run(main())
     """
+    if (
+        hasattr(model.config, "features")
+        and any(isinstance(arg, list) for arg in args)
+        and hasattr(model.config, "predict")
+    ):
+        if isinstance(model.config.predict, Features):
+            predict_feature = [
+                feature.name for feature in model.config.predict
+            ]
+        else:
+            predict_feature = [model.config.predict.name]
+        args = list_records_to_dict(
+            [feature.name for feature in model.config.features]
+            + predict_feature,
+            *args,
+            model=model,
+        )
     async with contextlib.AsyncExitStack() as astack:
         # Open sources
         sctx = await astack.enter_async_context(records_to_sources(*args))
@@ -68,7 +85,7 @@ async def score(
     model,
     accuracy_scorer: Union[AccuracyScorer, AccuracyContext],
     features: Union[Feature, Features],
-    *args: Union[BaseSource, Record, Dict[str, Any]],
+    *args: Union[BaseSource, Record, Dict[str, Any], List],
 ) -> float:
     """
     Assess the accuracy of a machine learning model.
@@ -138,6 +155,21 @@ async def score(
         )
     if isinstance(features, Feature):
         features = Features(features)
+    if any(isinstance(arg, list) for arg in args) and hasattr(
+        model.config, "predict"
+    ):
+        if isinstance(model.config.predict, Features):
+            predict_feature = [
+                feature.name for feature in model.config.predict
+            ]
+        else:
+            predict_feature = [model.config.predict.name]
+        args = list_records_to_dict(
+            [feature.name for feature in model.config.features]
+            + predict_feature,
+            *args,
+            model=model,
+        )
 
     async with contextlib.AsyncExitStack() as astack:
         # Open sources
@@ -164,7 +196,7 @@ async def score(
 
 async def predict(
     model,
-    *args: Union[BaseSource, Record, Dict[str, Any]],
+    *args: Union[BaseSource, Record, Dict[str, Any], List],
     update: bool = False,
     keep_record: bool = False,
 ):
@@ -228,6 +260,21 @@ async def predict(
     {'Years': 6, 'Salary': 70}
     {'Years': 7, 'Salary': 80}
     """
+    if any(isinstance(arg, list) for arg in args) and hasattr(
+        model.config, "predict"
+    ):
+        if isinstance(model.config.predict, Features):
+            predict_feature = [
+                feature.name for feature in model.config.predict
+            ]
+        else:
+            predict_feature = [model.config.predict.name]
+        args = list_records_to_dict(
+            [feature.name for feature in model.config.features]
+            + predict_feature,
+            *args,
+            model=model,
+        )
     async with contextlib.AsyncExitStack() as astack:
         # Open sources
         sctx = await astack.enter_async_context(records_to_sources(*args))

diff --git a/dffml/util/internal.py b/dffml/util/internal.py
@@ -14,6 +14,13 @@
 from ..source.memory import MemorySource, MemorySourceConfig
 
 
+class CannotConvertToRecord(Exception):
+    """
+    Raised when a list is provided to convert to records but the model doesn't
+    exist.
+    """
+
+
 @contextlib.asynccontextmanager
 async def records_to_sources(*args):
     """
@@ -55,3 +62,13 @@ async def records_to_sources(*args):
             for already_open_sctx in sctxs:
                 sctx.append(already_open_sctx)
             yield sctx
+
+
+def list_records_to_dict(features, *args, model=None):
+    if model:
+        args = list(args)
+        for i in range(len(args)):
+            if isinstance(args[i], list):
+                args[i] = dict(zip(features, args[i]))
+        return args
+    raise CannotConvertToRecord("Model does not exist!")
diff --git a/tests/test_high_level.py b/tests/test_high_level.py
@@ -135,6 +135,16 @@ async def test_predict(self):
         self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
         self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
 
+        # Test input data as list
+        await train(model, *self.train_data)
+        await score(model, scorer, Feature("Salary", int, 1), *self.test_data)
+        predictions = [
+            prediction
+            async for prediction in predict(model, *self.predict_data)
+        ]
+        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
+        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
+
 
 class TestDataFlow(TestOrchestrator):
     @contextlib.asynccontextmanager