In [1]:
import numpy as np

In [2]:
class Operation:
    """Represents a Node in the Computation Graph"""
    
    def __init__(self, input_nodes = []):
        """Constructs an Operation with input_nodes as inputs
           which computes outputs to zero or more consumers"""
        self.input_nodes = input_nodes
        self.consumers = []
        
        # Connect this node with its inputs, by adding it as a consumer to its inputs
        for input_node in self.input_nodes:
            input_node.consumers.append(self)
        
        # Add this operation to the Computation Graph
        # TODO: provide the graph explicitly
        _default_graph.operations.append(self)
    
    def compute(self):
        """Computes the output of the operation. Depends on the specific operation."""
        pass

In [3]:
class Add(Operation):
    def __init__(self, x, y):
        super().__init__(input_nodes=[x, y])
    
    def compute(self, x, y):
        return x + y

In [4]:
class Matmul(Operation):
    def __init__(self, A, B):
        super().__init__(input_nodes=[A, B])

    def compute(self, A, B):
        return A.dot(B)

In [5]:
class Sigmoid(Operation):
    def __init__(self, x):
        super().__init__(input_nodes=[x])
    
    def compute(self, x):
        return 1 / (1 + np.exp(-x))

In [6]:
class Softmax(Operation):
    def __init__(self, x):
        super().__init__(input_nodes=[x])
    
    def compute(self, x):
        """The input of Softmax is a vector"""
        # using vector operations
        # axis=1 so that for each row we sum its colums
        # the sum will eat 1 dimension, so we broadcast with [:, None]
        return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None]

In [7]:
class Log(Operation):
    def __init__(self, x):
        super().__init__(input_nodes=[x])
    
    def compute(self, x):
        return np.log(x)

In [8]:
class Multiply(Operation):
    """Element-wise multiplication of 2 matrices A and B"""
    
    def __init__(self, A, B):
        super().__init__(input_nodes=[A, B])

    def compute(self, A, B):
        return A * B

In [9]:
class ReduceSum(Operation):
    """Computes the sum of the given tensor A based on the given axis.
       axis=None computes the sum of the whole tensor A.
    """
    
    def __init__(self, A, axis=None):
        super().__init__(input_nodes=[A])
        self.axis = axis

    def compute(self, A):
        return np.sum(A, axis=self.axis)

In [10]:
class Negate(Operation):
    def __init__(self, x):
        super().__init__(input_nodes=[x])

    def compute(self, x):
        return -x

In [11]:
class Placeholder:
    """Represents an input node which doesn't have any inputs
       and can only be consumed by other Nodes in the Computation Graph.
       
       The Placeholder has a fixed value. Acts like a constant."""
    
    def __init__(self):
        self.consumers = []
        
        # Register the placeholder in the Computation Graph
        # TODO: provide the graph explicitly
        _default_graph.placeholders.append(self)

In [12]:
class Variable:
    """Represents a parameter in the Computation Graph.
       This node doesn't have any inputs and has only consumers.
       
       The Variable's value can change. It is initialized to initial_value."""
    
    def __init__(self, initial_value=None):
        self.value = initial_value
        self.consumers = []
        
        # Register the variable in the Computation Graph
        # TODO: provide the graph explicitly
        _default_graph.variables.append(self)

In [13]:
class Graph:
    """Represents the actual Computation Graph which has 3 types of Nodes:
       - placeholders
       - variables
       - operations
    """
    
    def __init__(self, placeholders=[], variables=[], operations=[]):
        self.placeholders = placeholders
        self.variables = variables
        self.operations = operations

    def as_default(self):
        global _default_graph
        _default_graph = self
        return _default_graph

In [14]:
class Session:
    """Represents a single execution of the whole Computation graph."""
    # TODO: provide the Graph explicitly
    
    def run(self, operation, feed_dict={}):
        """Performs a post-order traversal of all nodes in the Computation graph,
           so that all operations with known inputs are performed first.
        """
        
        nodes_in_post_order = Session.traverse_post_order(operation)
        
        outputs = {operation: None for operation in nodes_in_post_order}
        
        for node in nodes_in_post_order:
            if type(node) == Placeholder:
                outputs[node] = feed_dict[node]
            elif type(node) == Variable:
                outputs[node] = node.value
            elif isinstance(node, Operation):
                computed_inputs = [outputs[input_node] for input_node in node.input_nodes]
                outputs[node] = node.compute(*computed_inputs)

        return outputs[operation]

    @staticmethod
    def traverse_post_order(operation):
        operations_post_order = []
        
        def traverse(node):
            # Placeholders and Variables do not have input_nodes
            if isinstance(node, Operation):
                for input_node in node.input_nodes:
                    traverse(input_node)

            operations_post_order.append(node)
        
        traverse(operation)
        return operations_post_order

In [15]:
# Linear Perceptron

graph = Graph().as_default()

A = Variable(np.array([
    [1, 0],
    [0, -1]
]))
b = Variable(np.array([1, 1]))

x = Placeholder()

y = Add(Matmul(A, x), b)

Session().run(y, feed_dict={
    x: np.array([1, 2])
})

array([ 2, -1])

In [16]:
# Sigmoid Perceptron

graph = Graph().as_default()

x = Placeholder()
w = Variable(initial_value=np.random.normal(0, 1, 2))
b = Variable(initial_value=np.random.normal(0, 1))

perceptron = Sigmoid(Add(Matmul(w, x), b))

Session().run(perceptron, feed_dict={
    x: np.array([-1, 1])
})

0.780807237593824

In [17]:
# Multi-class Perceptron

graph = Graph().as_default()

# will be a matrix used for batch computation
X = Placeholder()

W = Variable(np.array([
    [1, -1],
    [1, -1]
]))

b = Variable(np.array([0, 0]))

classifier = Softmax(Add(Matmul(X, W), b))

Session().run(classifier, {
    X: np.array([
        [-3, -3],
        [-3, -4],
        [4, 5],
        [3, 6]
    ])
})

array([[6.14417460e-06, 9.99993856e-01],
       [8.31528028e-07, 9.99999168e-01],
       [9.99999985e-01, 1.52299795e-08],
       [9.99999985e-01, 1.52299795e-08]])

In [20]:
# Cross-entropy loss

C = Placeholder()

J = Negate(ReduceSum(
    Multiply(C, Log(classifier))
))

Se