In [None]:
#export
from lab.imports import *
import lab.object_store.noop as noop_object_store
import math

In [None]:
#default_exp develop.treatments

# Develop Treatments

Offer a few tools to ensure a treatment is in working order. Treatment is a generic term for model. It can also be used in preprocessing, creating pipelines, analyzing data, setting up baseline models.

The scikit-learn interface for a model works for the lab. A valid treatment works like a class that can be trained and used.

Remember, a treatment is a reference to how to build a model. The library, dependencies, and environment needed are just referenced in a treatment. So, the regular workflow is build a model in a notebook, export the code into a library, ensure the treatment is stored in the object store. After that, you can develop operations for the runtime or just reference the treatment in the system you have running.

In [None]:
#export
PASSING_STATUS = 'passed'
FAILING_STATUS = 'failed'

def _create_step(name, message=None, status=PASSING_STATUS, **kw):
    """Create a step. A step is just a dictionary with some conventions.
    Default to passed and no message."""
    d = {**kw, **dict(name=name, status=status)}
    if not message is None: d['message'] = message
    return d

_pass_step = _create_step # Default is pass

def _fail_step(name, exception=None, **kw):
    """Create a failing step using an exception.
    It's common to pass a traceback to this function, but it's not
    necessary, or even used in the current state of anything."""
    return _create_step(name, status=FAILING_STATUS, message=str(exception), **kw)


In [None]:
passing = _pass_step('foo')
assert passing['name'] == 'foo'
assert passing['status'] == PASSING_STATUS
assert _create_step('foo') == passing

exception = None
trace = None
try:
    raise AssertionError("Exception message")
except Exception as e:
    _, _, trace = sys.exc_info()
    exception = e
failing = _fail_step('foo', exception=exception, traceback=trace)
assert failing['name'] == 'foo'
assert failing['status'] == FAILING_STATUS
assert failing['traceback'] == trace
assert failing['message'] == 'Exception message'

In [None]:
class FormatValidation:
    """convert a list of steps into a more-reasonable string explanation
    of what took place."""
    
    @classmethod
    def call(cls, steps, **kw):
        return cls(steps, **kw)()
    
    def __init__(self, steps, **kw):
        self.steps = steps
        self.kw = kw
        
    @property
    def all_passing(self):
        if hasattr(self, '_all_passing'): return self._all_passing
        self._all_passing = all([v == PASSING_STATUS for v in self._get_key('status')])
        return self._all_passing
        
    @property
    def valid(self):
        if hasattr(self, '_valid'): return self._valid
        self._valid = self.kw.get('valid', self.all_passing)
        return self._valid
    
    @property
    def passed(self):
        if hasattr(self, '_passed'): return self._passed
        self._passed = self._filter_steps(key='status', value=PASSING_STATUS)
        return self._passed
    
    @property
    def passing_message(self):
        if hasattr(self, '_passing_message'): return self._passing_message
        messages = []
        messages.append("Validation was successful.")
        passed_names = self._get_key('name', steps=self.passed)
        if len(passed_names) == 0:
            messages.append("No successful steps were reported.")
        else:
            step_names = ', '.join(passed_names)
            messages.append(f"Steps: {step_names}")
        self._passing_message = "\n".join(messages)
        return self._passing_message
    
    @property
    def failing_message(self):
        if hasattr(self, '_failing_message'): return self._failing_message
        messages = []
        messages.append("Validation failed.")

        for step in self.steps:
            messages.append(self._explain_step(step))
        
        failed_steps = self._filter_steps(value=FAILING_STATUS)
        if len(failed_steps) == 0: messages.append("No failing steps were reported.")
        self._failing_message = "\n".join(messages)
        return self._failing_message
    
    @property
    def message(self):
        if hasattr(self, '_message'): return self._message
        if self.valid:
            self._message = self.passing_message
        else:
            self._message = self.failing_message
        return self._message
        
    def _filter_steps(self, key='status', value=None, **kw):
        """Utility to filter a list of dictionaries by a key."""
        return [d for d in self.steps if d.get(key) == value]
    
    def _get_key(self, key, steps=None, **kw):
        """Utility to get a value from a list of dictionaries by a key."""
        if steps is None: steps = self.steps
        return [e.get(key) for e in steps]
    
    def _explain_success(self, step, default_name='unknown'):
        """Format a single successful step."""
        name = step.get('name', default_name)
        return f"Success with: {name}"
    
    def _explain_failure(self,
                         step,
                         default_name='unknown',
                         default_reason='reason unknown'):
        """Format a single failing step."""
        name = step.get('name', default_name)
        reason = step.get('message', default_reason)
        return f"Failing step: {name}. Reason: {reason}."
    
    def _explain_malformed(self, step, default_name='unknown'):
        name = step.get('name', default_name)
        reason = step.get('message')
        message = f"Possible problem with {name}. The status was not set, but assumed to be a problem."
        if not reason is None: message += f' Message: {reason}'
        return message
    
    def _explain_step(self, step):
        status = step.get('status')
        if status == PASSING_STATUS: return self._explain_success(step)
        if status == FAILING_STATUS: return self._explain_failure(step)
        return self._explain_malformed(step)
    
    def __call__(self):
        return self.message
    
def format_validation(steps, **kw): return FormatValidation.call(steps, **kw)

In [None]:
steps = [
    dict(name='a', status='passed'),
    dict(name='b', status='passed'),
    dict(name='c', status='failed', message='Explain problems with c'),
    dict(name='d', status='passed'),
    dict(name='e'),
    dict(name='f', status='failed'),
    dict(status='failed', message="Something went wrong in a malformed step"),
]

subject = FormatValidation(steps)
assert not subject.valid
assert subject.message == subject.failing_message
assert subject() == subject.message
assert "Validation failed" in subject()
assert "Success with: a" in subject()
assert "Failing step" in subject()
assert "Possible problem" in subject()

assert "Validation was successful" in FormatValidation.call(steps, valid=True)
assert "No successful steps were reported" in FormatValidation.call([])
assert FormatValidation([]).valid
assert "No successful steps were reported" in FormatValidation.call([{'status': FAILING_STATUS}], valid=True)
assert "Validation was successful" in format_validation([])

These tools should probably be general utilities, moved somewhere else, but I'll get a validated treatment stored first.

In [None]:
#export

class Validate:
    """Validate a Treatment"""
    
    DEFAULT_STEPS = [
        ('setup model', 'fn'),
        ('configure model', 'model'),
        ('duck type', 'valid_duck_type')
    ]
    
    @classmethod
    def call(cls, **kw):
        return cls(**kw)()
    
    def __init__(self, **kw):
        self.kw = kw
        
    @property
    def steps(self):
        if hasattr(self, '_steps'): return self._steps
        self._steps = self.kw.get('steps', self.DEFAULT_STEPS)
        return self._steps
        
    @property
    def name(self):
        """Get the name of the treatment."""
        if hasattr(self, '_name'): return self._name
        self._name = self.kw.get('name')
        return self._name
    
    @property
    def package(self):
        """Reveal the string of a package, if provided."""
        if hasattr(self, '_package'): return self._package
        self._package = self.kw.get('package')
        return self.package
    
    @property
    def loaded_package(self):
        """Reveal the imported package, if provided."""
        if hasattr(self, '_loaded_package'): return self._loaded_package
        if self.package is None: return None
        try:
            self._loaded_package = __import__(self.package)
            return self._loaded_package
        except:
            self._loaded_package = None
        
    @property
    def fn(self):
        if hasattr(self, '_fn'): return self._fn
        g = globals()
        if 'fn' in self.kw and self.kw['fn'] in g:
            self._fn = g[self.kw['fn']]
        if self.name:
            if self.name in g:
                self._fn = g[self.name]
            if self.loaded_package and hasattr(self.loaded_package, self.name):
                self._fn = getattr(self.loaded_package, self.name)
        if not hasattr(self, '_fn'): self._fn = None
        return self._fn
    
    @property
    def unused_keywords(self):
        """Sloppy way to use keywords for both validation and model building."""
        if hasattr(self, '_unused_keywords'): return self._unused_keywords
        known = ['steps', 'name', 'package'] # keywords used in validation, not model building
        self._unused_keywords = {k:v for k, v in self.kw.items() if not k in known}
        return self._unused_keywords
    
    @property
    def model(self):
        """Construct a model from the unused keywords."""
        # TODO: Revisit this, it might not be what I want.
        if hasattr(self, '_model'): return self._model
        self._model = self.fn()(**self.unused_keywords)
        return self._model
    
    @property
    def valid_duck_type(self):
        """TODO: write validations that the model is usable."""
        return True
    
    def validate(self):
        """Run the steps to validate."""
        results = []
        for (name, attribute) in self.steps:
            try:
                passing = bool(getattr(self, attribute))
                result = _pass_step(name) if passing else _fail_step(name)
                results.append(result)
            except Exception as e:
                _, _, exc_traceback = sys.exc_info()
                results.append(_fail_step(name, exception=e, traceback=exc_traceback))
        passing = all([d.get('status') == PASSING_STATUS for d in results])
        return passing, results
    
    def __call__(self):
        return self.validate()
    
def validate(**kw): return Validate.call(**kw)

In [None]:
class _SpecialTreatment:
    
    def model(self, **kw):
        return 'model...that needs to duck type'
    
    def __call__(self, **kw):
        return self.model(**kw)

base = dict(
    name='_SpecialTreatment'
)

subject = Validate(**base)
assert subject.name == base['name']
assert subject.fn == _SpecialTreatment
assert subject.steps == Validate.DEFAULT_STEPS

subject = Validate(name='acos', package='math')
assert subject.name == 'acos'
assert subject.package == 'math'
assert subject.loaded_package == math
assert subject.fn == math.acos

model_keywords = {'tools': 'for the model'}
subject = Validate(name='_SpecialTreatment', **model_keywords)
assert subject.unused_keywords == model_keywords
expected = _SpecialTreatment()(**model_keywords)
assert subject.model == expected
passed, steps = subject()
assert passed

passed, steps = validate(name='acos', package='math')
assert not passed
explanation = format_validation(steps)
assert "Failing step: configure model" in explanation

At this point we can validate a treatment, with the exception of duck typing the treatment--will it work with other tools? That's coming.

I don't like the way I'm building the model, sharing keywords, sharing details at high levels. Given some use of treatments, I've got to redesign some interfaces.

Also, it's probably OK that I create classes for the heavy lifting, but I've been hesitant to bring load_once and load_once_property into this library. Keeping it simple is pedantic.

Some things will move out to a utilities module:

* FormatValidation
* _pass_step and _fail_step
* Some of the dictionary utilities in FormatValidation