In [None]:
# default_exp train.messages

# Messages

Using Protocol Buffer to setup the types of objects I need:

* subject
* treatment
* model
* evaluation
* expectation
* invocation

There could be more, but this handles the lifecycle of a data product:

* Define the data or type of data we're working with. This can come across a stream, be found in file, or resemble academic data sets.
* Learn how to learn from the data, defining a treatment that builds a model, applies a heuristic, or somehow transforms the data.
* Store a model with its meta data, so we know what it is, when it was built, how it was built, etc.
* Store evaluations of data transforms in standard ways, so they can be used in reporting and progress management.
* Convert data into data expectations. This works on input data to a model as well as output data from a model. This can define a distribution, or any rules that make the data valid or alarming.
* Create external interfaces to functions, models, and API calls. This is especially useful with all of my open keyword function signatures.

In [None]:
#hide
pwd = %pwd
if pwd.split('/')[-1] == 'nbs':
    %cd ..

/Users/davidrichards/codes/hydra/lab


In [None]:
from lab.util.test_functions import *
import google.protobuf
import lab.train.messages_pb2 as messages_pb2
import zlib
import binascii

In [None]:
class MessageBuilder:
    """Create Protocol Buffer messages from Python
    data types."""
    
    DEFAULT_MESSAGE_CLASS = messages_pb2.Model
    MESSAGE_TYPE = google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType
    
    def __init__(self, **kw):
        self.kw = kw
        
    def _class_from_object(self, o):
        # TODO: Add recursion, e.g. Model.Evaluation
        if isinstance(o, str):
            if hasattr(messages_pb2, o):
                return self._class_from_object(getattr(messages_pb2, o))
        if isinstance(o, self.MESSAGE_TYPE):
            return o
        
    @property
    def _message_class(self):
        if hasattr(self, '__message_class'): return self.__message_class
        o = self.kw.get('kind', self.DEFAULT_MESSAGE_CLASS)
        self.__message_class = self._class_from_object(o)
        return self.__message_class
    
    @property
    def instance(self):
        if hasattr(self, '_instance'): return self._instance
        self._instance = self._message_class()
        return self._instance

    @property
    def _fields(self):
        if hasattr(self, '__fields'): return self.__fields
        self.__fields = dict(self._message_class.DESCRIPTOR.fields_by_name)
        return self.__fields
            
    def build(self, data):
        # TODO: add recursion and type checking.
        data = {k:v for k, v in data.items() if k in self._fields}
        try:
            return self._message_class(**data)
        except:
            # Something is wrong...just ignore it for now...
            pass
        
    def __call__(self, **data):
        return self.build(data)
    
model_builder = MessageBuilder(kind='Model')
evaluation_builder = MessageBuilder(kind='Evaluation')

In [None]:
assert MessageBuilder()._message_class == MessageBuilder.DEFAULT_MESSAGE_CLASS
assert MessageBuilder(kind=messages_pb2.Model)._message_class == messages_pb2.Model
assert MessageBuilder(kind='Model')._message_class == messages_pb2.Model
assert MessageBuilder(kind=messages_pb2.Evaluation)._message_class == messages_pb2.Evaluation
assert MessageBuilder(kind='Evaluation')._message_class == messages_pb2.Evaluation

description = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas commodo sem id mi fringilla volutpat. Etiam sed massa sapien. Curabitur elementum, quam id feugiat tristique, tellus mauris pretium sem, sit amet congue elit lectus non dolor. Suspendisse imperdiet lectus at est posuere, vel suscipit arcu rhoncus. Integer ultricies laoreet erat eu laoreet. Pellentesque nec metus scelerisque, dignissim quam ut, suscipit magna. Sed nec purus ut felis eleifend volutpat.

Mauris consectetur metus a finibus pellentesque. Nunc eleifend tempus mollis. Nullam eget interdum enim. Sed nunc lorem, elementum vel pretium in, luctus eu turpis. In hac habitasse platea dictumst. Curabitur tincidunt placerat est, eu imperdiet enim cursus laoreet. Nam placerat varius lacus, at laoreet felis facilisis ut. Vestibulum vitae eros ante. Vestibulum mattis sem ut lacus posuere, at tincidunt augue imperdiet. Nulla congue erat vitae suscipit ultrices. Maecenas gravida egestas magna eget condimentum. Morbi vitae varius magna. Pellentesque ut diam magna. Etiam justo nibh, aliquet eget finibus at, finibus ac quam. Sed nec porta justo, ac aliquet sem. Donec sit amet elit lobortis, volutpat sapien eget, ullamcorper erat.

Nullam eu quam porttitor, rhoncus dolor id, rhoncus nulla. Morbi et elementum nunc. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Sed imperdiet nulla nunc, vel sollicitudin libero dapibus sed. Curabitur ultrices ante at gravida rhoncus. Nunc quis ante convallis, dapibus mi sit amet, efficitur ex. Ut magna est, egestas sit amet scelerisque sit amet, finibus a lectus. Sed vitae dignissim leo. Sed laoreet ante et sem convallis auctor. Morbi sodales ut magna ultrices sodales. Integer luctus blandit nunc. Sed a dapibus massa. Suspendisse rhoncus erat lacus, quis facilisis dui ornare dignissim. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Curabitur purus ante, vulputate at justo in, egestas egestas ante. Duis vitae est rutrum, luctus erat eget, luctus enim.

Aliquam eu neque nisl. Ut egestas, dui nec posuere sagittis, justo massa gravida velit, a pulvinar urna turpis in elit. Integer ex orci, scelerisque non felis quis, ornare aliquet quam. Fusce at metus at risus auctor interdum a eget ante. Aenean congue, risus ac rutrum gravida, leo tellus ullamcorper nisi, quis aliquet justo orci ut ex. Morbi sagittis augue ac dui posuere, quis vestibulum dui finibus. Nam elementum ex non diam sollicitudin, sit amet finibus velit convallis. Vivamus viverra, ipsum quis cursus tempor, neque erat tincidunt elit, a tempor dolor enim quis nibh. Morbi vestibulum massa sed tempus hendrerit. Quisque aliquet metus feugiat, pellentesque nisl id, rutrum quam. Nulla sit amet dapibus ipsum. Morbi ut lorem aliquam, lacinia tortor vitae, luctus mauris. Maecenas vehicula elementum justo gravida luctus. Maecenas suscipit pellentesque nisi ut rutrum. Praesent a convallis quam.

Donec at nisi eros. Vestibulum sed libero vitae nunc posuere aliquet. Aliquam erat volutpat. Sed lorem felis, sagittis et pulvinar id, posuere sed arcu. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec vitae dolor vulputate, iaculis velit fermentum, posuere ante. Aenean erat nisl, feugiat ac interdum sed, fringilla vitae purus. Curabitur pulvinar aliquam dui, eget suscipit augue volutpat id. Nullam auctor mollis mi, vel sodales erat scelerisque vel. Aliquam bibendum purus eu feugiat hendrerit. Pellentesque id ultrices ante. Nullam non mauris a nibh finibus commodo. Fusce vel purus vitae tortor vestibulum scelerisque sed eu erat. In hac habitasse platea dictumst. Ut feugiat magna nunc, eget tempor sapien commodo nec. Donec tincidunt augue at leo scelerisque euismod.
"""

data = dict(name='foo', description=description, foo=42)

assert model_builder.build(data).description == description
assert model_builder(**data).name ==data['name']

data = dict(name='bar', evaluations=[{'name': 'accuracy', 'values': [1,2,3,4]}])
model = model_builder(**data)
assert len(model.evaluations) == 1


In [None]:
def serialize(message):
    return message.SerializeToString()

def deserialize(s, kind):
    message = kind()
    message.ParseFromString(s)
    return message

DEFAULT_COMPRESSION = zlib.Z_BEST_COMPRESSION
COMPRESSION_LEVELS = dict(
    best_speed=zlib.Z_BEST_SPEED,
    best_compression=zlib.Z_BEST_COMPRESSION,
    default=zlib.Z_DEFAULT_COMPRESSION,
)

def compress_message(message, level=DEFAULT_COMPRESSION):
    if isinstance(level, str): level = COMPRESSION_LEVELS.get(level, DEFAULT_COMPRESSION)
    s = serialize(message)
    return zlib.compress(s, level)

def decompress_message(compressed, kind):
    decompressed = zlib.decompress(compressed)
    return deserialize(decompressed, kind)

In [None]:
message = model_builder(**data)

In [None]:
message

name: "bar"
evaluations {
  name: "accuracy"
  values: 1.0
  values: 2.0
  values: 3.0
  values: 4.0
}

In [None]:
serialize(message)

b'\n\x03bar"\x1e\n\x08accuracy\x15\x00\x00\x80?\x15\x00\x00\x00@\x15\x00\x00@@\x15\x00\x00\x80@'

In [None]:
compress_message(message)

b'x\xda\xe3bNJ,R\x92\xe3\xe2HLN.-JL\xae\x14e`h\xb0\x07\x12\x0c\x0e@\xc2\x01D48\x00\x00\x9d\x1b\x07s'

In [None]:
compress_message(message, level='nonsense')

b'x\xda\xe3bNJ,R\x92\xe3\xe2HLN.-JL\xae\x14e`h\xb0\x07\x12\x0c\x0e@\xc2\x01D48\x00\x00\x9d\x1b\x07s'

In [None]:
x = compress_message(message)
decompress_message(x, messages_pb2.Model)

name: "bar"
evaluations {
  name: "accuracy"
  values: 1.0
  values: 2.0
  values: 3.0
  values: 4.0
}

In [None]:
model_builder(description='Foo').version

'0.0.1'

In [None]:
built =decompres_message(x, messages_pb2.Model)
built.version

'0.0.1'

In [None]:
model_builder._fields

{'name': <google.protobuf.pyext._message.FieldDescriptor at 0x11d7e9580>,
 'description': <google.protobuf.pyext._message.FieldDescriptor at 0x11d7e9ee0>,
 'version': <google.protobuf.pyext._message.FieldDescriptor at 0x11d7e91c0>,
 'evaluations': <google.protobuf.pyext._message.FieldDescriptor at 0x11d7e9fa0>}

In [None]:
{k:getattr(built, k) for k in model_builder._fields.keys()}

{'name': 'bar',
 'description': '',
 'version': '0.0.1',
 'evaluations': [name: "accuracy"
 values: 1.0
 values: 2.0
 values: 3.0
 values: 4.0
 ]}