In [319]:
import uuid
import msgpack
from abc import ABC, abstractmethod

class NodeFactory():
    """
    Create a copy of a node hosted by a server on the client side.
    """
       
    def get_root(server):
        """
        Get the uid of the root node from the server.
        """
        return msgpack.unpackb(server.request({'request': 'send_root'}), raw=False)
    
    def get_edges(server, uid):
        """
        Get the edges of the node with the passed uid from the server.
        """
        return msgpack.unpackb(server.request({'request': 'send_edges', 'uid': uid}), raw=False)
    
    def get_info(server, uid):
        """
        Get the info of the node with the passed uid.
        The info tells us the type of the node and allows us to create the same node on the client side.
        """
        return msgpack.unpackb(server.request({'request': 'send_info', 'uid': uid}), raw=False)
        
    def get_meta(server, uid):
        """
        Get the metadata of the node with the passed uid.
        """
        return msgpack.unpackb(server.request({'request': 'send_meta', 'uid': uid}), raw=False)
        
    def get_data(server, uid, **kwargs):
        """
        Get the data of the node with the passed uid.
        Arbitrary kwargs can also be passed to specify how to read the data.
        For example we can pass an index kwarg to read a specific chunk of the data.
        The send_data method of the node that you are connecting to will need to 
        understand the kwargs that you pass.
        """
        return msgpack.unpackb(server.request({'request': 'send_data', 'uid': uid, 'kwargs': kwargs}), 
                               raw=False)
        
    def produce(server, uid):
        """
        Create a copy locally of a node on the server specified by the uid.
        """
        # I'll make this dynamic later.
        # Can use entrypoints or importlib to discover node classes.
        node = NumpyTestNode(uid=uid,
                             edges=NodeFactory.get_edges(server, uid),
                             meta=NodeFactory.get_meta(server, uid),
                             data=(NodeFactory.get_data(server, uid, **{'index': i}) for i in range(10)))
        return node

In [320]:
class BaseNode(ABC):
    """
    BaseNode
    
    This is an abstract base class that defines the required methods and attributes for a node in the graph.
    This class is deigned to access data locally as well as remotely.
    All of the arguments are optional, the arguments are mainly 
    used when creating an instance on the client side.
    
    Parameters
    ----------
    uid: uuid4 (optional)
        Identifier for the node.
    edges: dict (optional)
        Mapping of a label to adjacent node.
        If working locally, the label maps to the actual node instance.
        If working remotely, the label maps to the uid of the adjacent node, 
        only when __getitem__ is called is it replaced by an instance of the node.
    meta: dict (optional)
        dict of metadata, you can put any serializable dict here.
    data: object (optional)
        Can be anything, but it should be able to be read in chunks, and should be lazy.
        The chunks must be serializable.
    server: not sure yet (optional)
        only needed on the client side when working remotely.
        it is the required info to connect to the server to get nodes.
        for now it is just an instance of the server simulator GraphServer.
    """
    
    def __init__(self, uid=None, edges=None, meta=None, data=None, server=None):
        self.uid = uid or str(uuid.uuid4())
        self.edges = edges or {}
        self.info = {'module': 'Base', 'class': 'BaseNode', 'version': '0.0.1'}
        self.meta = meta
        self.data = data
        self.server = server
            
    def __getitem__(self, edge):
        """
        Access adjacent nodes.
        On the server side edges are the actual adjacent nodes.
        On the client side edges are the uids of the adjacent nodes.
        On the client side the factory access the server for the data to construct the node locally.
        """
        if not isinstance(self.edges[edge], BaseNode):
            self.edges[edge] = NodeFactory.produce(server, self.edges[edge])
        return self.edges[edge]
    
    @abstractmethod
    def read(self):
        """
        Convert raw data in self.data to a python object.
        """
        pass

    @abstractmethod
    def load_meta(self):
        """
        Load header data from the datasource.
        """
        pass
    
    @abstractmethod
    def load_data(self):
        """
        This method loads the data from the source to self.data
        self.data is a list of partitions, each partition must be serializable.
        """
        pass
    
    def send_info(self, **kwargs):
        """
        Sends the info to the client.
        The info identifies the type of the node object to be contructed on the client side.
        """
        return self.info
    
    def send_edges(self, **kwargs):
        """
        Send the dictionary of edges to the client.
        Only the uid of the edges must be sent.
        """
        return {**{key:value.uid for (key, value) in self.edges.items() if isinstance(value, BaseNode)},
                **{key:value for (key, value) in self.edges.items() if not isinstance(value, BaseNode)}}
    
    def send_meta(self, **kwargs):
        """
        Sends header data to client.
        """
        self.load_meta()
        return self.meta
    
    def send_data(self, index, **kwargs):
        """
        Sends the data partitions to the client.
        """
        if self.data is None:
            self.load_data()
        return self.data[index]

In [321]:
import warnings

class RootNode(BaseNode):
    """
    RootNode__call__
    
    If a server is specified, this will become a copy of the servers root node.
    """
    def __init__(self, server=None, **kwargs):
        if server:
            self.uid = NodeFactory.get_root(server)
            self.info = NodeFactory.get_info(server, self.uid)
            self.edges = NodeFactory.get_edges(server, self.uid)
            self.meta = None
            self.data = None
        else:       
            super().__init__(**kwargs)
    
    def read(self):
        warnings.warn("This is a RootNode, it has no data to read.")
    
    def load_meta(self):
        warnings.warn("This is a RootNode, it has no data to read.")
    
    def load_data(self):
        warnings.warn("This is a RootNode, it has no data to read.")
        

class EmptyNode(BaseNode):
    """
    EmptyNode
    
    This node has no data or metadata. 
    """
    def read(self):
        warnings.warn("This is a EmptyNode, it has no data to read.")
    
    def load_meta(self):
        warnings.warn("This is a EmptyNode, it has no data to read.")
    
    def load_data(self):
        warnings.warn("This is a EmptyNode, it has no data to read.")

In [322]:
import numpy

class NumpyTestNode(BaseNode):
    """
    Just for testing.
    """
    def read(self):
        # Convert data to a different format.
        if self.data is None:
            self.load_data()
        flattened_data = [item for sublist in self.data for item in sublist]
        return numpy.array(flattened_data)
    
    def load_meta(self):
        # Load header data from a data source.
        self.meta = {'field1': 'testing'}
    
    def load_data(self):
        self.data = [list(range(10)) for i in range(20)]

In [323]:
class GraphServer:
    """
    A graph server
    
    A server only has one root. (for now)
    Authentication can be handled here by checking is a user can access a specific uid.
    
    Parameters
    ----------
    graph: node
        The root node of the graph.
    """
    def __init__(self, graph):
        self.root = graph
        self.nodes = self.load_graph(graph)
        
    def request(self, request):
        """
        Handles the requests for data from the client.
        """
        if request['request'] == 'send_root':
            return msgpack.packb(self.send_root())
        else:
            return msgpack.packb(getattr(self.nodes[request['uid']], 
                                 request['request'])(**request.get('kwargs', {})))
    
    def send_root(self):
        """
        Returns the uid of the root node.
        """
        return self.root.uid
    
    def load_graph(self, root):
        """
        Returns a dictionary of all nodes in the graph.
        This allows the client to access the nodes by uid in constant time.
        """
        found = {}
        
        def dfs(node):
            if node.uid not in found:
                found.update({node.uid: node})
                for neighbour in node.edges.values():
                    dfs(neighbour)

        dfs(root)
        return found

# Local mode: just build your graph locally, and access it locally

In [324]:
# Make a graph with a root node, and 10 NumpyTestNodes connected to it.
graph1 = EmptyNode()
graph1.edges = {i: NumpyTestNode() for i in range(10)}

# Make a second graph the same way.
graph2 = EmptyNode()
graph2.edges = {i: NumpyTestNode() for i in range(10)}

# Connect the two graphs.
graph3 = EmptyNode()
graph3.edges = {'g1': graph2, 'g2': graph2}

# This is expected to throw a warning, because a RootNode has no data.
graph3['g1'].read()

# Read the data as a numpy array.
graph3['g1'][3].read()



array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
       2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3,
       4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5,
       6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7,
       8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
       2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3,
       4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5,
       6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7,
       8, 9])

# Remote mode: Host the graph from before on a server, access it locally

In [325]:
# Serve the graph
server = GraphServer(graph3)

# Get the graph from the server.
local_graph = RootNode(server)

# Read the data.
local_graph['g1'][3].read()



array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
       2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3,
       4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5,
       6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7,
       8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
u