In [2]:
import json
import collections
import networkx

In [14]:
def graph_prototype():
    return {'version': 'gmxapi_graph_0_2',
            'elements': {}}

def element_prototype():
    return {'label': None,
            'namespace': None,
            'operation': None,
            'input': {},
            'output': {},
            'depends': [],
            'interface': {}}

In [17]:
sample1 = {
    'version': 'gmxapi_graph_0_2',
    'elements': {
        'integer_XXX': {
            'label': 'my_array',
            'namespace': 'gmxapi',
            'operation': 'Integer',
            'input': [[0, 1], [2, 3]],
            'output': {
                'meta': {
                    'type': 'gmxapi.Integer',
                    'shape': [2,2]
                }
            }
        }
    }
}

In [None]:
sample2 = {
    'version': 'gmxapi_graph_0_2',
    'elements': {
        'cli_XXX': {
            'namespace': 'gmxapi',
            'operation': 'cli',
            'input': [[0, 1], [2, 3]],
            'output': {
                'meta': {
                    'type': 'gmxapi.Integer',
                    'shape': [2,2]
                }
            }
        }
    }
}

In [15]:
class WorkGraph(collections.UserDict):
    """Helper class to hold a representation of a deserialized work graph.
    
    This is not intended to be a required part of the object model, but just a
    tool for validation or visualization. Client code should interact with a
    work graph through a Context. The Context is free to use an appropriate
    internal data structure.
    """
    def __init__(self):
        super().__init__(graph_prototype())

In [16]:
graph = WorkGraph()
graph['elements']['foo'] = 'bar'
print(WorkGraph().data)

In [None]:
# Key types

class ObjectName(collections.UserString):
    def __init__(self, name: str):
        # TODO: validation
        super().__init__(name)

class Label(collections.UserString):
    def __init__(self, name: str):
        # TODO: validation
        super().__init__(name)

# Value types

class Data:
    """Literal data for work graph input values.
    
    Literal data objects are dense arrays of uniform element type. 
    """

class Collection:
    """Value that is a nested mapping of keys to value objects.
    
    Keys are strings subject to the constraints of Label.
    
    Values are Data, Collections, or References.
    """

class Reference:
    """Work record value object that is a reference to another graph entity."""

In [None]:
# Manage the structure and rules of a work record.
import copy
class WorkRecord(collections.UserDict):
    def __init__(self):
        super().__init__(graph_prototype)
        
    def __getitem__(self, item):
        return copy.deepcopy(super().__getitem__(item))

    def as_json(self, **kwargs):
        import json
        json.dumps(self.data, **kwargs)
        

In [4]:
record = WorkRecord()
print(str(record))
print(repr(record))
print(record)

In [33]:
#
import collections
import json

# The behavior of `bytes` is sufficient that a UID class is probably not necessary,
# though we might want to guarantee that a UID is exactly 32 bytes. TBD...

class Fingerprint(object):
    import hashlib as _hashlib

    def __init__(self, *, operation, input, depends = ()):

        # TODO: replace (list, tuple) with abstraction for valid operation values
        if not isinstance(operation, (list, tuple)):
            raise ValueError('Fingerprint requires a sequence of operation name components.')
        else:
            self.operation = tuple(operation)

        # TODO: replace (dict, str) with abstraction for valid input values.
        if not isinstance(input, (dict, str)):
            raise ValueError('Fingerprint requires a valid input representation.')
        else:
            self.input = {key: value for key, value in input.items()}

        # TODO: replace (list, tuple) with abstraction for valid depends values.
        if not isinstance(depends, (list, tuple)):
            ValueError('Fingerprint requires a sequence for dependency specification.')
        else:
            self.depends = tuple(depends)
    
    def compact_json(self):
        identifiers = collections.OrderedDict([
            ('depends', self.depends),
            ('input', self.input),
            ('operation', self.operation)
        ])
        id_string = json.dumps(identifiers,separators=(',', ':'), sort_keys=True, ensure_ascii=True)
        return id_string

    def uid(self) -> bytes:
        id_string = self.compact_json()
        id_bytes = id_string.encode('utf-8')
        id_hash = Fingerprint._hashlib.sha256(id_bytes)
        size = id_hash.digest_size
        if not size == 32:
            raise ValueError('Expected digest_size 8, but got {}'.format(size))
        digest = id_hash.digest()
        assert isinstance(digest, bytes)
        assert len(digest) == size
        return digest


class Integer64(object):
    import json as _json
    # TODO: Replace numpy dependency with memoryview manager or core gmxapi
    #  buffer protocol provider.
    # Note that the built-in Python array module only provides 1-dimensional arrays.
    from numpy import array as _array
    
    def __init__(self, data):
        self.data = Integer64._array(data, dtype='int64')
    
    def to_json(self, **json_args) -> str:
        record = {
            'operation': ('gmxapi', 'Integer64'),
            'input': self.data.tolist(),
            'depends': ()
        }
        serialization = Integer64._json.dumps(record, **json_args)
        return serialization
    
    @classmethod
    def from_json(cls, serialized: str):
        record = Integer64._json.loads(serialized)
        # if not record['operation']
    
    def fingerprint(self):
        return Fingerprint(operation=['gmxapi', 'Integer'], input=self.to_json())

# TODO: Probably want a generic SerializedOperation named type or abstract handling
#  for the various ways const nodes could be passed.

In [None]:
my_array = Integer64([[1,2], [3,4]])
fingerprint = my_array.fingerprint()
print(fingerprint.to_json())

uid = fingerprint.uid()
print(uid.hex())
print(int.from_bytes(uid, byteorder='big'))
print(my_array.to_json())

In [None]:
# Test fingerprinting for Integer64
import hashlib

expected_json = '{"depends":[],"input":[[1,2],[3,4]],"operation":["gmxapi","Integer64"]}'
expected_hash = hashlib.sha256(expected_json.encode('utf-8')).digest()
print(expected_hash.hex())

In [None]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.

In [34]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.

In [32]:
# Test fingerprinting for Integer64
import hashlib

expected_json = '{"depends":[],"input":[[1,2],[3,4]],"operation":["gmxapi","Integer64"]}'
expected_hash = hashlib.sha256(expected_json.encode('utf-8')).digest()
print(expected_hash.hex())

In [None]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.