Skip to content

Commit

Permalink
high_level: ml: Add predict_features parameter to 'accuracy()'
Browse files Browse the repository at this point in the history
Signed-off-by: mHash1m <hashimchaudry23@gmail.com>
  • Loading branch information
mhash1m authored and pdxjohnny committed Aug 17, 2021
1 parent 1b0327c commit a520483
Show file tree
Hide file tree
Showing 97 changed files with 371 additions and 213 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Usecase example notebook for "Saving and loading models"
- Usecase example notebook for "Transfer Learning"
- Usecase example notebook for "Ensemble by stacking"
- Support for Multi-Output models.
- Usecase example notebook for "Working with Multi-Output models"
### Changed
- Calls to hashlib now go through helper functions
- Build docs using `dffml service dev docs`
Expand All @@ -33,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `high_level` split into `ml`, `dataflow` & `source` submodules
- Config objects now support mutability/immutability at the property scope.
See `docs/arch/0003-Config-Property-Mutable-vs-Immutable` for details.
- high_level `accuracy()` now takes predict features as parameter.
### Fixed
- Record object key properties are now always strings

Expand Down
3 changes: 2 additions & 1 deletion dffml/accuracy/accuracy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import abc

from ..feature import Feature
from ..model import ModelContext
from ..source.source import SourcesContext
from ..util.entrypoint import base_entry_point
Expand All @@ -25,7 +26,7 @@ def __init__(self, parent: "Accuracy") -> None:

@abc.abstractmethod
async def score(
self, mctx: ModelContext, sources: SourcesContext,
self, mctx: ModelContext, sources: SourcesContext, *args: Feature
) -> float:
"""
Abstract method to get the score
Expand Down
14 changes: 6 additions & 8 deletions dffml/accuracy/clf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ..base import config
from ..record import Record
from ..feature import Feature
from ..feature import Feature, Features
from ..model import ModelContext
from ..util.entrypoint import entrypoint
from ..source.source import SourcesContext
Expand All @@ -21,16 +21,14 @@ class ClassificationAccuracyContext(AccuracyContext):
Classification Accuracy
"""

async def score(self, mctx: ModelContext, sources: SourcesContext):
if len([mctx.parent.config.predict]) != 1:
raise InvalidNumberOfFeaturesError(
f"{self.__class__.__qualname__} can only assess accuracy of one feature. features: {features}"
)
async def score(
self, mctx: ModelContext, sources: SourcesContext, feature: Feature,
):
total = 0
right_predictions = 0
async for record in mctx.predict(sources):
if str(record.feature(mctx.parent.config.predict.name)) == str(
record.prediction(mctx.parent.config.predict.name).value
if str(record.feature(feature.name)) == str(
record.prediction(feature.name).value
):
right_predictions += 1
total += 1
Expand Down
14 changes: 5 additions & 9 deletions dffml/accuracy/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,14 @@ class MeanSquaredErrorAccuracyContext(AccuracyContext):
Mean Squared Error
"""

async def score(self, mctx: ModelContext, sources: SourcesContext):
if len([mctx.parent.config.predict]) != 1:
raise InvalidNumberOfFeaturesError(
f"{self.__class__.__qualname__} can only assess accuracy of one feature. features: {features}"
)
async def score(
self, mctx: ModelContext, sources: SourcesContext, feature: Feature,
):
y = []
y_predict = []
async for record in mctx.predict(sources):
y.append(record.feature(mctx.parent.config.predict.name))
y_predict.append(
record.prediction(mctx.parent.config.predict.name).value
)
y.append(record.feature(feature.name))
y_predict.append(record.prediction(feature.name).value)
accuracy = sum(
list(map(lambda x, y: abs(x - y) ** 2, y, y_predict))
) / len(y)
Expand Down
7 changes: 6 additions & 1 deletion dffml/cli/ml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
from typing import Union

from ..model.model import Model
from ..source.source import Sources, SubsetSources
Expand All @@ -15,6 +16,7 @@
)
from ..base import config, field
from ..accuracy import AccuracyScorer
from ..feature import Feature, Features


@config
Expand All @@ -28,6 +30,7 @@ class AccuracyCMDConfig:
scorer: AccuracyScorer = field(
"Method to use to score accuracy", required=True
)
features: Features = field("Predict Feature(s)", default=Features())
sources: Sources = FIELD_SOURCES


Expand Down Expand Up @@ -64,7 +67,9 @@ async def run(self):
# at this point rather than an instance.
if inspect.isclass(self.scorer):
self.scorer = self.scorer.withconfig(self.extra_config)
return await accuracy(self.model, self.scorer, self.sources)
return await accuracy(
self.model, self.scorer, self.features, self.sources
)


@config
Expand Down
17 changes: 15 additions & 2 deletions dffml/high_level/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ..model import Model
from ..record import Record
from ..feature import Feature, Features
from ..source.source import BaseSource
from ..accuracy.accuracy import AccuracyScorer, AccuracyContext
from ..util.internal import records_to_sources
Expand Down Expand Up @@ -64,6 +65,7 @@ async def train(model, *args: Union[BaseSource, Record, Dict[str, Any]]):
async def accuracy(
model,
accuracy_scorer: Union[AccuracyScorer, AccuracyContext],
features: Union[Feature, Features],
*args: Union[BaseSource, Record, Dict[str, Any]],
) -> float:
"""
Expand Down Expand Up @@ -116,6 +118,7 @@ async def accuracy(
... await accuracy(
... model,
... MeanSquaredErrorAccuracy(),
... Feature("Salary", int, 1),
... {"Years": 4, "Salary": 50},
... {"Years": 5, "Salary": 60},
... ),
Expand All @@ -124,6 +127,16 @@ async def accuracy(
>>> asyncio.run(main())
Accuracy: 0.0
"""
# TODO Use this to ensure that we're always passing features before records
# We can remove it eventually once we know we've updated everywhere
# appropriately
if not isinstance(features, (Feature, Features)):
raise TypeError(
f"features was {type(features)}: {features!r}. Should have been Feature or Features"
)
if isinstance(features, Feature):
features = Features(features)

async with contextlib.AsyncExitStack() as astack:
# Open sources
sctx = await astack.enter_async_context(records_to_sources(*args))
Expand All @@ -139,8 +152,8 @@ async def accuracy(
# TODO Replace this with static type checking and maybe dynamic
# through something like pydantic. See issue #36
raise TypeError(f"{accuracy_scorer} is not an AccuracyScorer")
# Run accuracy method
return float(await actx.score(mctx, sctx))

return float(await actx.score(mctx, sctx, *features))


async def predict(
Expand Down
1 change: 1 addition & 0 deletions dffml/model/slr.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class SLRModel(SimpleModel):
-model-features f1:float:1 \
-model-predict ans:int:1 \
-model-location tempdir \
-features ans:int:1 \
-sources f=csv \
-source-filename dataset.csv \
-scorer mse \
Expand Down
1 change: 1 addition & 0 deletions dffml/skel/model/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dffml accuracy \
-model-location tempdir \
-sources csv=iris_training.csv \
-classifications 0 1 2 \
-features label:int:1 \
-model-features \
SepalLength:float:1 \
SepalWidth:float:1 \
Expand Down
1 change: 1 addition & 0 deletions dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class MySLRModel(SimpleModel):
-model-features x:float:1 \
-model-predict y:int:1 \
-model-location tempdir \
-features y:int:1 \
-sources f=csv \
-source-filename test.csv
1.0
Expand Down
2 changes: 1 addition & 1 deletion dffml/skel/model/examples/example_myslr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# Assess accuracy (alternate way of specifying data source)
scorer = MeanSquaredErrorAccuracy()
print("Accuracy:", accuracy(model, scorer, "test.csv"))
print("Accuracy:", accuracy(model, scorer, Feature("y", int, 1), "test.csv"))

# Make prediction
for i, features, prediction in predict(model, "predict.csv"):
Expand Down
5 changes: 4 additions & 1 deletion dffml/skel/model/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ async def test_00_train(self):
async def test_01_accuracy(self):
# Use the test data to assess the model's accuracy
res = await accuracy(
self.model, self.scorer, *[{"X": x, "Y": y} for x, y in TEST_DATA]
self.model,
self.scorer,
Feature("Y", float, 1),
*[{"X": x, "Y": y} for x, y in TEST_DATA],
)
# Ensure the accuracy is above 80%
self.assertTrue(0.0 <= res < 0.1)
Expand Down
24 changes: 0 additions & 24 deletions dffml/util/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,27 +175,3 @@ def within_method(obj: object, method_name: str, max_depth: int = -1) -> bool:
):
return True
return False


def no_inplace_append(list_a: list, arg_b: Union[str, list]) -> list:
"""
Append method that acts as inplace = False. Takes a list and another argument
list or string. Creates a duplicate of the first list and appends the second argument
to the duplicate. Returns the duplicate list.
Parameters
----------
list_a : list
The list to append the argument to.
arg_b : str, list
The argument to append to the list.
Returns
------
list_dup : list
Duplicate list of list_a with arg_b appended.
"""

lista_dup = list_a.copy()
lista_dup.append(arg_b)
return lista_dup
4 changes: 2 additions & 2 deletions docs/tutorials/accuracy/mse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ have access the model's config and the sources records.

.. literalinclude:: /../dffml/accuracy/mse.py
:test:
:lines: 19-39
:lines: 19-35

Scorer
------
Expand All @@ -61,4 +61,4 @@ scorer in the cli.

.. literalinclude:: /../dffml/accuracy/mse.py
:test:
:lines: 42-45
:lines: 38-41
10 changes: 5 additions & 5 deletions docs/tutorials/models/docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ in the console should be highlighted. There is no proper way to highlight a

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:language: rst
:lines: 99-163
:lines: 99-164
:linenos:
:lineno-start: 99

Expand All @@ -154,9 +154,9 @@ format Python files. It can't format examples within rST within a docstring.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:language: rst
:lines: 164-171
:lines: 165-172
:linenos:
:lineno-start: 164
:lineno-start: 165

By specifying the ``:filepath:`` we copied the contents of the Python example
to the test environment's directory. The last thing we need to do is run the
Expand All @@ -167,9 +167,9 @@ another ``"""```.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:language: rst
:lines: 173-178
:lines: 174-179
:linenos:
:lineno-start: 172
:lineno-start: 173

.. _model_tutorial_docs_testing_examples:

Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/models/slr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ We must set the ``CONFIG`` attribute to the respective ``Config`` class.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:test:
:lines: 180-181
:lines: 181-182

We can override the ``__init__()`` method to do validation on the ``features``
config property. Simple linear regression only supports one input feature, so we
will raise a ``ValueError`` if the user supplys more than one feature.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:test:
:lines: 183-187
:lines: 184-188

Train
-----
Expand All @@ -125,7 +125,7 @@ is saved and loaded from a JSON file on disk.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:test:
:lines: 188-202
:lines: 190-204

Predict
-------
Expand All @@ -140,7 +140,7 @@ confidence in our prediction.

.. literalinclude:: /../dffml/skel/model/REPLACE_IMPORT_PACKAGE_NAME/myslr.py
:test:
:lines: 205-224
:lines: 206-225

Python Usage
------------
Expand Down
1 change: 1 addition & 0 deletions examples/MNIST/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dffml accuracy \
-model-location tempdir \
-model-classifications $(seq 0 9) \
-model-features image:int:$((28 * 28)) \
-features label:int:1 \
-sources images=df label=idx1 \
-source-images-dataflow normalize.yaml \
-source-images-features image:int:$((28 * 28)) \
Expand Down
5 changes: 4 additions & 1 deletion examples/accuracy/mse/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@
mse_accuracy = MeanSquaredErrorAccuracy()

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, mse_accuracy, "dataset.csv"))
print(
"Accuracy:",
accuracy(model, mse_accuracy, Feature("ans", int, 1), "dataset.csv"),
)
1 change: 1 addition & 0 deletions examples/flower17/pytorch-alexnet/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dffml accuracy \
-model-normalize_std 0.229 0.224 0.225 \
-model-features image:int:$((500*500)) \
-model-predict label:str:1 \
-features label:str:1 \
-sources f=dir \
-source-foldername flower_dataset/test \
-source-feature image \
Expand Down
1 change: 1 addition & 0 deletions examples/flower17/sklearn-opencv/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dffml accuracy \
Haralick:int:13 \
-model-predict label:str:1 \
-model-location tempdir \
-features label:str:1 \
-sources images=df \
-source-images-source dir \
-source-images-source-foldername flower_dataset/test \
Expand Down
1 change: 1 addition & 0 deletions examples/model/slr/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dffml accuracy \
-model-features f1:float:1 \
-model-predict ans:int:1 \
-model-location tempdir \
-features ans:int:1 \
-sources f=csv \
-source-filename dataset.csv \
-scorer mse \
Expand Down
4 changes: 3 additions & 1 deletion examples/model/slr/slr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

# Assess accuracy (alternate way of specifying data source)
scorer = MeanSquaredErrorAccuracy()
print("Accuracy:", accuracy(model, scorer, "dataset.csv"))
print(
"Accuracy:", accuracy(model, scorer, Feature("ans", int, 1), "dataset.csv")
)

# Make prediction
for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}):
Expand Down
1 change: 1 addition & 0 deletions examples/nlp/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dffml accuracy \
-model-classifications 0 1 \
-model-location tempdir \
-model-features embedding:float:[1,10,96] \
-features sentiment:int:1 \
-sources text=df \
-source-text-dataflow nlp_ops_dataflow.json \
-source-text-features sentence:str:1 \
Expand Down
1 change: 1 addition & 0 deletions examples/nlp/sklearn/accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dffml accuracy \
-model-features extract_array_from_matrix.outputs.result:float:1 \
-model-predict sentiment:int:1 \
-model-location tempdir \
-features sentiment:int:1 \
-sources text=df \
-source-text-dataflow nlp_ops_dataflow.json \
-source-text-features sentence:str:1 \
Expand Down

0 comments on commit a520483

Please sign in to comment.