# Gnarl - An Easy to use Deep Learning Framework for Python

Welcome to Gnarl, a lightweight deep learning network built in Python using Numpy.

In [1]:
# Import dependencies
import numpy as np

np.seterr(all='raise')

{'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}

In [2]:
# Utility functions to help with implementations and checks of Node classes
def topological_sort(feed_dict):
    """
    Sort generic nodes in topological order using Kahn's Algorithm.

    `feed_dict`: A dictionary where the key is an `Input` node and 
                the value is the respective value feed to that node.

    Returns a list of sorted nodes.
    """

    input_nodes = [n for n in feed_dict.keys()]

    G = {}
    nodes = [n for n in input_nodes]
    while len(nodes) > 0:
        n = nodes.pop(0)
        if n not in G:
            G[n] = {'in': set(), 'out': set()}
        for m in n.outbound_nodes:
            if m not in G:
                G[m] = {'in': set(), 'out': set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)

    L = []
    S = set(input_nodes)
    while len(S) > 0:
        n = S.pop()

        if isinstance(n, Input):
            n.value = feed_dict[n]

        L.append(n)
        for m in n.outbound_nodes:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            # if no other incoming edges add to S
            if len(G[m]['in']) == 0:
                S.add(m)
    return L

def forward_pass(output_node, sorted_nodes):
    """
    Performs a forward pass through a list of sorted nodes.

    Arguments:

        `output_node`: The output node of the graph (no outgoing edges).
        `sorted_nodes`: a topologically sorted list of nodes.

    Returns the output node's value
    """
    for n in sorted_nodes:
        n.forward()

    return output_node.value

def forward_and_backward(graph):
    """Performs a forward pass and a backward pass through a list of sorted Nodes.

    Arguments:

        `graph`: The result of calling `topological_sort`.
    """
    # Forward pass
    for n in graph:
        n.forward()

    # Backward pass
    # see: https://docs.python.org/2.3/whatsnew/section-slices.html
    for n in graph[::-1]:
        n.backward()

def sgd_update(trainables, learning_rate=1e-4):
    """Updates the value of each trainable with SGD.

    Arguments:

        `trainables`: A list of `Input` Nodes representing weights/biases.
        `learning_rate`: The learning rate.
    """
    for t in trainables:
        partial = t.gradients[t]
        t.value -= learning_rate * partial    

## Defining a computational graph using Nodes and Edges

First, we define a generic Node class object that takes a list of inbound nodes as its input argument, initializes itself and attaches itself as part of the list of outbound nodes to each inbound node.

### Node(object)

In [3]:
class Node(object):
    def __init__(self, inbound_nodes=[]):
        """A node in a computational graph."""
        self.inbound_nodes = inbound_nodes
        self.outbound_nodes = []
        self.value = None # Init first value as None
        self.gradients = {}
        
        # Append this node to all nodes that point to it
        for in_node in self.inbound_nodes:
            in_node.outbound_nodes.append(self)
    
    def forward(self):
        """Forward propagate input from inbound nodes to outbound nodes."""
        raise NotImplemented
    
    def backward(self):
        """Backward propagate output from outbound nodes to inbound nodes."""
        raise NotImplemented

## Necessary Subclasses to Node
We will need to subclass the Node base class so that we have a set of nodes that are able to perform different types of computation required to build a fully connected computational graph.

### Input(Node) 

In [4]:
class Input(Node):
    def __init__(self):
        """An input node. Input nodes don't perform any computations.
        Rather, they represent the input features that will be fed into 
        the neural network.
        """
        Node.__init__(self)
    
    def forward(self, value=None):
        """Forward propagate input value"""
        if value is not None:
            self.value = value
    
    def backward(self):
        """Backward propagate from outbound nodes to this node."""
        self.gradients = {self: 0}
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            self.gradients[self] += grad_cost

### Add(Node)

In [5]:
class Add(Node):
    def __init__(self, *args):
        """A node that adds its input nodes together."""
        Node.__init__(self, [*args])
    
    def forward(self):
        """Adds the values of the input nodes and sets the value
        of the node.
        """
        self.value = 0.
        for in_node in self.inbound_nodes:
            try:
                self.value += in_node.value
            except:
                print('Couldn\'t add value from input node:', in_node.value)
    
    def backward(self):
        """Backward propagate from outbound nodes to this node."""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for idx, n in enumerate(self.outbound_nodes):
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[idx]] += grad_cost

### Subtract(Node)

In [6]:
class Subtract(Node):
    def __init__(self, x, y):
        """A node that subtracts inputs."""
        Node.__init__(self, [x, y])
    
    def forward(self):
        """Compute forward propagation of node value."""
        self.value = self.inbound_nodes[0].value
        for in_node in self.inbound_nodes[1:]:
            try:
                self.value -= in_node.value
            except:
                print('Couldn\'t subract value from input node:', in_node.value)
    
    def backward(self):
        """Compute backward propagation of node."""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for idx, n in enumerate(self.outbound_nodes):
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[idx]] -= grad_cost

### Mul(Node)

In [7]:
class Mul(Node):
    def __init__(self, x, y):
        """A node that multiplies its input nodes together."""
        Node.__init__(self, [x, y])
    
    def forward(self):
        """Multiplies the value of the input nodes and sets the value
        of the node."""
        x = self.inbound_nodes[0].value
        y = self.inbound_nodes[1].value
        
        try:
            self.value = x.dot(y)
        except:
            print('Couldn\'t mutiply the value from input node:', in_node.value)
        
    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for idx, n in enumerate(self.outbound_nodes):
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[idx]] *= grad_cost

In [8]:
ns = [Input() for n in range(0,5)]
i = np.array([1., 2., 3.])
for n in ns:
    n.forward(i)
    i += 1
m = Mul(ns[1], ns[2])
m.forward()
print(m.value)
m.backward()
print(m.gradients[ns[2]])

149.0
[ 0.  0.  0.]


### Divide(Node)

In [9]:
class Divide(Node):
    def __init__(self, x, y):
        """Compute the element-wise division between two input nodes."""
        Node.__init__(self, [x, y])
    
    def forward(self):
        """Forward propagate the value from dividing one input node with
        the other."""
        self.x = self.inbound_nodes[0].value
        self.y = self.inbound_nodes[1].value
        
        self.value = np.divide(self.x, self.y) 
    
    def backward(self):
        """Backward propagate value from outbound node gradients."""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[0]] = 1. / self.y * grad_cost
            self.gradients[self.inbound_nodes[1]] = - self.x / (self.y**2) * grad_cost

In [10]:
x, y = Input(), Input()

x.forward(value=np.array([[1,2], [3,4]]))
y.forward(value=np.array([[2,2], [9, 4]]))

d = Divide(x, y)
d.forward()
print(d.value)
d.backward()
print(d.gradients)

[[ 0.5         1.        ]
 [ 0.33333333  1.        ]]
{<__main__.Input object at 0x10a552d30>: array([[0, 0],
       [0, 0]]), <__main__.Input object at 0x10a552da0>: array([[0, 0],
       [0, 0]])}


### Linear(Node)
Represents a linear combination of input nodes and their respective weights inside a hidden layer.

In [11]:
class Linear(Node):
    def __init__(self, *args):
        """A node that computes the linear combination of a list of input
        nodes features, a list of input weights, and a bias term."""
        Node.__init__(self, [*args])
    
    def forward(self):
        """Compute the linear combination of the inbound nodes."""
        
        # Create vectors
        X = self.inbound_nodes[0].value
        W = self.inbound_nodes[1].value
        
        if self.inbound_nodes[2] is None:
            # No bias term
            self.value = np.dot(X, W)
        else:
            b = self.inbound_nodes[2].value
            self.value = np.dot(X, W) + b
    
    def backward(self):
        """Compute the backward propagation of the node."""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for n in self.outbound_nodes:
            # Find current grad
            grad_cost = n.gradients[self]
            
            # Compute partial grads
            dX = np.dot(grad_cost, self.inbound_nodes[1].value.T)
            dW = np.dot(self.inbound_nodes[0].value.T, grad_cost)
            cumul_b = np.sum(grad_cost, axis=0, keepdims=False)
            
            # Add to current node grads
            self.gradients[self.inbound_nodes[0]] += dX
            self.gradients[self.inbound_nodes[1]] += dW
            self.gradients[self.inbound_nodes[2]] += cumul_b

In [12]:
inputs, weights, bias = Input(), Input(), Input()

f = Linear(inputs, weights, bias)

feed_dict = {
    inputs: [6, 14, 3],
    weights: [0.5, 0.25, 1.4],
    bias: 2
}

graph = topological_sort(feed_dict)
output = forward_pass(f, graph)

print(output) # should be 12.7 with this example

12.7


In [13]:
X, W, b = Input(), Input(), Input()

f = Linear(X, W, b)

X_ = np.array([[-1., -2.], [-1, -2]])
W_ = np.array([[2., -3], [2., -3]])
b_ = np.array([-3., -5])

feed_dict = {X: X_, W: W_, b: b_}

graph = topological_sort(feed_dict)
output = forward_pass(f, graph)

"""
Output should be:
[[-9., 4.],
[-9., 4.]]
"""
print(output)

[[-9.  4.]
 [-9.  4.]]


## Activation Nodes
The following are some activation functions represented as nodes in the computational graph that a neural network can be represented as.

### Sigmoid(Node)
Mainly used in logistic regression and simplistic neural networks.

In [14]:
class Sigmoid(Node):
    def __init__(self, node):
        """Compute the sigmoid of a given input node."""
        Node.__init__(self, [node])
    
    def _sigmoid(self, z):
        """Compute the sigmoid for an input z."""
        return 1./(1 + np.exp(-z))
    
    def forward(self):
        """Compute the value of the Sigmoid node."""
        Z = self.inbound_nodes[0].value
        self.value = self._sigmoid(Z)
    
    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            sigmoid = self.value
            dZ = sigmoid * (1. - sigmoid)
            self.gradients[self.inbound_nodes[0]] += dZ * grad_cost # Ele wise

In [15]:
X, W, b = Input(), Input(), Input()

f = Linear(X, W, b)
g = Sigmoid(f)

X_ = np.array([[-1., -2.], [-1, -2]])
W_ = np.array([[2., -3], [2., -3]])
b_ = np.array([-3., -5])

feed_dict = {X: X_, W: W_, b: b_}

graph = topological_sort(feed_dict)
output = forward_pass(g, graph)

"""
Output should be:
[[  1.23394576e-04   9.82013790e-01]
 [  1.23394576e-04   9.82013790e-01]]
"""
print(output)

[[  1.23394576e-04   9.82013790e-01]
 [  1.23394576e-04   9.82013790e-01]]


### ReLU(Node)
More stable than the sigmoid. Runs risk of dead neurons.

In [16]:
class ReLU(Node):
    def __init__(self, node, epsilon=1e-4):
        """Computes rectified linear units for the node."""
        Node.__init__(self, [node])
        self.epsilon = epsilon
    
    def forward(self):
        """Forward propagate node values."""
        eps = np.zeros_like(self.inbound_nodes[0].value) + self.epsilon
        self.value = np.maximum(eps, self.inbound_nodes[0].value)
    
    def backward(self):
        """Backward propagate node gradients"""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            grad_cost[self.value <= self.epsilon] = 0. # Kill gradients where value is 0.
            self.gradients[self.inbound_nodes[0]] += grad_cost

In [17]:
X, W, b = Input(), Input(), Input()

f = Linear(X, W, b)
g = ReLU(f)

X_ = np.array([[-1., 2.], 
               [1, -1]])
W_ = np.array([[2., 1.], 
               [0., 1.]])
b_ = np.array([0., 0.])

feed_dict = {X: X_, W: W_, b: b_}

graph = topological_sort(feed_dict)
output = forward_pass(g, graph)

"""
Output should be:
[[  0   1]
 [  2   0]]
"""
print(output)

[[  1.00000000e-04   1.00000000e+00]
 [  2.00000000e+00   1.00000000e-04]]


### LeakyReLU(Node)
ReLU to avoid dead neurons.

In [18]:
class LeakyReLU(Node):
    def __init__(self, node, epsilon=0., leak=1e-2):
        """Computes leaky rectified linear units for the node."""
        Node.__init__(self, [node])
        self.epsilon = epsilon
        self.leak = leak
    
    def forward(self):
        """Forward propagate node values."""
        #eps = np.zeros_like(self.inbound_nodes[0].value) + self.epsilon
        
        #print('Forward bef:', self.inbound_nodes[0].value)
        self.value = np.maximum(self.epsilon, self.inbound_nodes[0].value)
        #print('Forward aft:', self.value)
    
    def backward(self):
        """Backward propagate node gradients"""
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            grad_cost[self.value <= self.epsilon] = self.leak
            
            self.gradients[self.inbound_nodes[0]] += grad_cost

## Cost Function (Output) Nodes
The following are some examples of commonly used cost functions used to compute the residuals or errors between actual outputs and estimated outputs.

### MSE(Node)
Works best for continuous outputs.

In [19]:
class MSE(Node):
    def __init__(self, y, y_hat):
        """A node that computes the mean squared error. 
        Should only be used at the last node in a network."""
        Node.__init__(self, [y, y_hat])
    
    def forward(self):
        """Compute the mean squared errror value for the node."""
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        y_hat = self.inbound_nodes[1].value.reshape(-1, 1)
        
        self.m = self.inbound_nodes[0].value.shape[0]
        self.error = y - y_hat
        
        self.value = np.mean(np.square(self.error))
    
    def backward(self):
        """Compute the gradient for the MSE."""
        self.gradients[self.inbound_nodes[0]] = (2. / self.m) * self.error
        self.gradients[self.inbound_nodes[1]] = (-2. / self.m) * self.error

In [20]:
y, y_hat = Input(), Input()
cost = MSE(y, y_hat)

y_ = np.array([1, 2, 3])
y_hat_ = np.array([4.5, 5, 10])

feed_dict = {y: y_, y_hat: y_hat_}
graph = topological_sort(feed_dict)
# forward pass
forward_and_backward(graph)

"""
Expected output

23.4166666667
"""
print(cost.value)

23.4166666667


### LogLoss(Node)
Should only be used for probabilistic output labels between 0 and 1.

In [21]:
class LogLoss(Node):
    def __init__(self, y, y_hat):
        """A node that represents the l2 cost function.
        1/m * sum [ (y - 1)*log(1 - y_hat) - y * log(y_hat) ].
        Should always be last node in a graph, and only useful for probabilistic
        outputs between 0 and 1."""
        Node.__init__(self, [y, y_hat])
    
    def forward(self):
        """Forward propagate node value, e.g. the loss/cost"""
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        y_hat = self.inbound_nodes[1].value.reshape(-1, 1)
        
        self.m = self.inbound_nodes[0].value.shape[0]
        self.error = (y - 1.) * np.log(1. - y_hat) - y * np.log(y_hat)
        
        self.value = np.mean(self.error)
    
    def backward(self):
        """Backward propagate gradient weights."""
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        y_hat = self.inbound_nodes[1].value.reshape(-1, 1)
        
        self.gradients[self.inbound_nodes[0]] = (1. / self.m) * (np.log(1. - y_hat) \
                                                 - np.log(y_hat))
        self.gradients[self.inbound_nodes[1]] = (1. / self.m) * ((y - 1.) * 1. / (1. - y_hat) * -1. \
                                                - y / (y_hat))

In [22]:
y, y_hat = Input(), Input()
cost = LogLoss(y, y_hat)

y_ = np.array([1, 0, 1])
y_hat_ = np.array([0.99, 0.1, 0.98])

feed_dict = {y: y_, y_hat: y_hat_}
graph = topological_sort(feed_dict)

# forward pass
forward_and_backward(graph)

"""
Expected output

0.0452045196096
"""
print(cost.value)

0.0452045196096


### Softmax(Node)

In [23]:
class Softmax(Node):
    def __init__(self, y, y_hat):
        """A node that represents the softmax loss. 
        Should always be last node in a computational graph. 
        Only useful for multinomial classification problems.
        """
        Node.__init__(self, [y, y_hat])
    
    def _softmax(self, z):
        z -= np.max(z) # To prevent blowup
        return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)
    
    def forward(self):
        """Forward propagate the node value."""
        y = self.inbound_nodes[0].value
        y_hat = self.inbound_nodes[1].value
        
        probs = self._softmax(y_hat)
        self.probs = probs # For backward prop  
        y_log_probs = - np.log(probs[range(len(y)), y])
        
        loss = np.sum(y_log_probs) / len(y)      
        self.value = loss
        
        # TODO: Add regularization
        
    def backward(self):
        """Backpropagate gradients."""
        grad_probs = self.probs
        grad_probs[range(len(self.inbound_nodes[0].value)), 
                         self.inbound_nodes[0].value] -= 1
        grad_probs /= len(self.inbound_nodes[0].value)
        
        #self.gradients[self.inbound_nodes[0]] = 
        #self.gradients[self.inbound_nodes[1]] = grad_probs

In [24]:
y, y_hat = Input(), Input()
cost = Softmax(y, y_hat)

y_ = np.array([[1,0,0], [0,1,0], [0,0,1]])
y_hat_ = np.array([[0.7,0.3,0.2], [0.10,0.7,0.2], [0,0,0.98]])

feed_dict = {y: y_, y_hat: y_hat_}
graph = topological_sort(feed_dict)

p = np.exp(y_hat_) / np.sum(np.exp(y_hat_), axis=1, keepdims=True)
y_log_probs = - np.log(p[range(len(y_)), y_])
loss = np.sum(y_log_probs)/len(y_)

print(p)
print(loss)

# forward pass
#forward_and_backward(graph)
forward_pass(cost, graph)

"""
Expected output


"""
print(cost.value, cost.gradients)

[[ 0.43920315  0.29440668  0.26639018]
 [ 0.25462853  0.46396343  0.28140804]
 [ 0.21438726  0.21438726  0.57122548]]
3.66404736725
3.66404736725 {}


# Test the architecture

In [28]:
"""
Test the nn architecture.

Notice that the weights and biases are
generated randomly.

No need to change anything, but feel free to tweak
to test your network, play around with the epochs, batch size, etc!
"""

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample
import sys
from time import time

# Load data
data = load_boston()
X_ = data['data']
y_ = data['target']

# Normalize data
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

n_features = X_.shape[1]
n_hidden_l1 = 10
n_hidden_l2 = 5
n_output = 1

# First hidden layer
W1_ = np.random.randn(n_features, n_hidden_l1)
b1_ = np.zeros(n_hidden_l1)

# Second hidden layer
W2_ = np.random.randn(n_hidden_l1, n_hidden_l2)
b2_ = np.zeros(n_hidden_l2)

# Output layer
W3_ = np.random.randn(n_hidden_l2, n_output)
b3_ = np.zeros(n_output)

# Neural network
X, y = Input(), Input()
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()
W3, b3 = Input(), Input()

# First hidden node
l1 = Linear(X, W1, b1)
#s1 = Sigmoid(l1)
s1 = LeakyReLU(l1, leak=0.)

# Second hidden node
l2 = Linear(s1, W2, b2)
#s2 = Sigmoid(l2)
s2 = LeakyReLU(l2, leak=0.)

# Output node
l3 = Linear(s2, W3, b3)

cost = MSE(y, l3)

feed_dict = {
    X: X_,
    y: y_,
    W1: W1_,
    b1: b1_,
    W2: W2_,
    b2: b2_,
    W3: W3_,
    b3: b3_
}

epochs = 100
# Total number of examples
m = X_.shape[0]
batch_size = 11
steps_per_epoch = m // batch_size

graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2, W3, b3]

print("Total number of examples = {}".format(m))

# Step 4
for i in range(epochs):
    loss = 0
    for j in range(steps_per_epoch):
        # Step 1
        # Randomly sample a batch of examples
        X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

        # Reset value of X and y Inputs
        X.value = X_batch
        y.value = y_batch

        # Step 2
        forward_and_backward(graph)

        # Step 3
        sgd_update(trainables, learning_rate=1e-4)

        loss += graph[-1].value

    if i != 1: # Weird bugs otherwise
        sys.stdout.write("\rEpoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))
    if i % 500 == 0:
        print('') # Force new epoch printline

Total number of examples = 506
Epoch: 1, Loss: 247.226
Epoch: 100, Loss: 13.693

In [29]:
graph

[<__main__.Input at 0x10f24c630>,
 <__main__.Input at 0x10a5526d8>,
 <__main__.Input at 0x10f24cf28>,
 <__main__.Input at 0x10f24c940>,
 <__main__.Input at 0x10f24cf60>,
 <__main__.Input at 0x10f24cf98>,
 <__main__.Input at 0x10a5527b8>,
 <__main__.Input at 0x10f24cfd0>,
 <__main__.Linear at 0x10f24c5f8>,
 <__main__.LeakyReLU at 0x10f24c550>,
 <__main__.Linear at 0x10f24c4e0>,
 <__main__.LeakyReLU at 0x10f24c588>,
 <__main__.Linear at 0x10f24c4a8>,
 <__main__.MSE at 0x10f24c9e8>]

## Create a model class to represent the network

In [30]:
nn_options = {
    'activation': 'leaky_relu',
    'learning_rate': 1e-3,
    'random_state': 1,
    'shuffle': True,
    'verbose': True,
    'solver': 'sgd',
    'loss': 'mse',
    'batch_size': 11
}

# Activation functions
ACTIVATIONS = {
    'relu': ReLU,
    'leaky_relu': LeakyReLU,
    'sigmoid': Sigmoid,
}

LOSSES = {
    'mse': MSE,
    'logloss': LogLoss,
    'softmax': None
}

class Gnarl(object):
    def __init__(self, X=None, y=None, activation='leaky_relu',
                 learning_rate=1e-4,
                 random_state=1,
                 shuffle=True,
                 verbose=False,
                 solver='sgd',
                 loss='mse',
                 batch_size=10):
        """An instance of a neural net model.
        
        Creates a neural network model object.
        """
        
        # Init model from options
        self._init(X=X, y=y, activation=activation, 
                   learning_rate=learning_rate, random_state=random_state, 
                   shuffle=shuffle, verbose=verbose, solver=solver, loss=loss,
                   batch_size=batch_size)
    
    def _init(self, X=None, y=None, activation='leaky_relu',
              learning_rate=1e-4,
              random_state=1,
              shuffle=True,
              verbose=False,
              solver='sgd',
              loss='mse',
              batch_size=batch_size):
        """Initialize the model."""
        
        # Todo: ensure that options are correct
        
        # Init dicts to hold the layers and the weights and biases
        #self.layers = {}
        self.nodes = {}
        self.layers_list = []
        self._weights = []
        self._biases = []
        self.trainables = []
        
        # Init graph
        self.graph = []
        
        try:
            self.activation = activation
            self.learning_rate = learning_rate
            self.random_state = random_state
            self.shuffle = shuffle
            self.verbose = verbose
            self.solver = solver
            self.loss = loss
            self.batch_size = batch_size
        except ValueError as e:
            print('Init failed with error: ', e)
        
        # Set X and y input nodes to initiate model
        self.X = Input()
        self.y = Input()
        
        self._update_input(X)
        self._update_output(y)
        
        # Set first nodes in node dict for future
        self.nodes[self.X] = self.X.value
        self.nodes[self.y] = self.y.value
        
        self._input_layer(self.X.value)
    
    def _update_input(self, X_train):
        """Update the model's feature training data."""
        self.X.value = X_train
    
    def _update_output(self, y_train):
        """Update the model's output training data."""
        self.y.value = y_train
    
    def _input_layer(self, X_train):
        """Define the model's input layer."""
        self.layers_list.append(self.X)
        self._weights.append(self.nodes[self.X]) # Convenience for hidden_layer
    
    def _reset_graph(self):
        """Reset graph node values."""
        for node in self.trainables:
                # If weights
                if len(node.value.shape) == 2:
                    node.value = np.random.randn(node.value.shape[0], node.value.shape[1])
                # if bias
                elif len(node.value.shape) == 1:
                    node.value = np.zeros_like(node.value)
    
    def hidden_layer(self, out_nodes, 
                     activation):
        """Add a hidden layer to the model."""
        # Init random weights 
        W = Input()
        W_ = np.random.randn(self._weights[-1].shape[1], out_nodes)
        self._weights.append(W_) # Store weights for convenience
        
        # Init biases
        b = Input()
        b_ = np.zeros(out_nodes)
        self._biases.append(b_) # Store biases for convenience
        
        # Add weights and biases to dicts for future use in connect_layers
        self.nodes[W] = W_
        self.nodes[b]= b_
        
        # Linear combo from previous layer to current layer
        layer = Linear(self.layers_list[-1], W, b)
        
        # Activation
        if activation == 'none': # Output layer is regression
            self.layers_list.append(layer)
        elif activation == 'softmax':
            raise NotImplemented
        else:
            activation = ACTIVATIONS[activation](layer)
            self.layers_list.append(activation)
        
        # Add weights and biases to trainables
        self.trainables += [W, b]
    
    def connect_layers(self):
        """Connect and build the computational graph the network represents."""
        
        # Attach loss/cost function to output layer.
        if self.loss not in LOSSES:
            raise NotImplemented
        
        if self.loss == 'softmax':
            loss = Softmax(self.y, self.layers_list[-1])
            #self.layers_list.append(loss)
        elif self.loss == 'mse':
            loss = MSE(self.y, self.layers_list[-1])
            #self.layers_list.append(loss)
        elif self.loss == 'logloss':
            loss = LogLoss(self.y, self.layers_list[-1])
            #self.layers_list.append(loss)
        
        self.graph = topological_sort(self.nodes)
        
    def fit(self, X_train, y_train, solver='sgd', epochs=10, fit_more_data=False):
        """Train the model."""
        
        # Reset graph values to purge nodes 
        if fit_more_data is False:
            self._reset_graph()
        
        # Setup convenient vars
        m = X_train.shape[0]
        steps_per_epoch = m // self.batch_size
        
        if self.verbose:
            print('Training model...')
            print('Total number of samples:', m)
            print('Steps per epoch:', steps_per_epoch)
            print('='*80)
        
        for i in range(epochs):
            loss = 0
            for j in range(steps_per_epoch):
                # Step 1
                # Randomly sample a batch of examples
                X_batch, y_batch = resample(X_train, y_train, n_samples=self.batch_size)

                # Reset value of X and y Inputs
                self._update_input(X_batch)
                self._update_output(y_batch)

                # Step 2
                forward_and_backward(self.graph)

                # Step 3
                sgd_update(self.trainables, learning_rate=self.learning_rate)

                loss += self.graph[-1].value
                
            if self.verbose:
                sys.stdout.write("\rEpoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))
                if i % 500 == 0:
                    print('') # Force new epoch printline
    
    def predict(self, X_test):
        """Predict output using the trained model.
        
        Return predictions.
        """
        self._update_input(X_test)
        
        # Forward propagate (avoid last node, as this is the loss)
        for node in self.graph[:-1]:
            node.forward()
            
        return self.graph[-2].value
    
    def score(self, y, y_pred, metric='accuracy'):
        """Compute the model's accuracy.
        
        Returns a float.
        """
        if metric is 'accuracy':
            try:
                return 100. * np.mean(y == y_pred)
            except ValueError as e:
                print(e)


In [37]:
from sklearn.model_selection import train_test_split

# Load data
data = load_boston()
X_ = data['data']
y_ = data['target']

# Normalize features
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

# Split data sets
X_train, X_test, y_train, y_test = train_test_split(X_, y_, test_size=0.2)
X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(X_train, y_train, test_size=0.8)

# Setup model
net = Gnarl(X_train_1, y_train_1, **nn_options)
net.hidden_layer(10, activation='leaky_relu')
net.hidden_layer(5, activation='leaky_relu')
net.hidden_layer(1, activation='none')
net.connect_layers()

AttributeError: 'NoneType' object has no attribute 'shape'

In [32]:
net.learning_rate = 1e-4
net.fit(X_train_1, y_train_1, epochs=500)

Training model...
Total number of samples: 80
Steps per epoch: 7
Epoch: 1, Loss: 892.400
Epoch: 500, Loss: 7.9052

In [33]:
print(net.trainables[-1].value)

[ 0.46788218]


In [34]:
net.fit(X_train_2, y_train_2, epochs=1000, fit_more_data=True)

Training model...
Total number of samples: 324
Steps per epoch: 29
Epoch: 1, Loss: 36.067
Epoch: 501, Loss: 9.2503
Epoch: 1000, Loss: 6.530

In [35]:
print(net.trainables[-1].value)

[ 1.01280231]


In [36]:
from sklearn.metrics import r2_score

# Predict y from test data
y_pred = net.predict(X_test)

# Compute r2
score = r2_score(y_test, y_pred)
print(score)

0.769566406144
