In [1]:
class Session:
    session = None
    def __init__(self):
        self.all_tensors = []
        
    def add_tensor(self, tensor):
        self.all_tensors.append(tensor)
        
    def clear(self):
        for tensor in self.all_tensors:
            tensor.clear()
            
    def gradients(self):
        tensors_to_gradients = {}
        for tensor in self.all_tensors:
            if not isinstance(tensor, Variable):
                continue
            tensors_to_gradients[tensor] = tensor.CE_gradient()
        return tensors_to_gradients
    
    def learn(self, learning_rate):
        for (tensor, gradient) in self.gradients().items():
            tensor.set_value(tensor.value() - gradient * learning_rate)
            
    def reset(self):
        self.all_tensors = []
    
    
    @classmethod
    def get(clazz):
        if clazz.session is None:
            clazz.session = Session()
        return clazz.session

In [19]:
import numpy as np

class SigmoidTensor:
    def __init__(self, input_tensor):
        self.input_tensor = input_tensor
        self.subsequent_tensors = []
        input_tensor.subsequent_tensors.append(self)
        self._value = None
        self._CE_gradient = None
        Session.get().add_tensor(self)
    
    def value(self):
        if self._value is None:
            self._value = 1 / (1 + np.exp(-self.input_tensor.value()))
        return self._value
    
    def CE_gradient(self, input_tensor):
        if self._CE_gradient is not None: return self._CE_gradient
        if input_tensor != self.input_tensor:
            raise 'Unknown input tensor'
        if len(self.subsequent_tensors) != 1:
            raise 'Incorrect number of subsequent tensors'
        self._CE_gradient = self.value() * (1 - self.value()) * self.subsequent_tensors[0].CE_gradient(self)
        return self._CE_gradient
    
    def clear(self):
        self._value = None
        self._CE_gradient = None

In [20]:
import pdb

class AddTensor:
    def __init__(self, input_tensor1, input_tensor2):
        self.input_tensor1 = input_tensor1
        self.input_tensor2 = input_tensor2
        self.subsequent_tensors = []
        input_tensor1.subsequent_tensors.append(self)
        input_tensor2.subsequent_tensors.append(self)
        self._value = None
        self._CE_gradient = None
        Session.get().add_tensor(self)
    
    def value(self):
        if self._value is None:
            self._value = self.input_tensor1.value() + self.input_tensor2.value()
        return self._value
    
    def CE_gradient(self, input_tensor):
        if input_tensor not in [self.input_tensor1, self.input_tensor2]:
            raise 'Unknown input tensor'
        if len(self.subsequent_tensors) != 1:
            raise 'Incorrect number of subsequent tensors'
        if self._CE_gradient is not None: return self._CE_gradient
        self._CE_gradient = self.subsequent_tensors[0].CE_gradient(self)
        return self._CE_gradient
    
    def clear(self):
        self._value = None
        self._CE_gradient = None

In [21]:
class Placeholder:
    def __init__(self):
        self.subsequent_tensors = []
        self._value = None
        Session.get().add_tensor(self)
    
    def set_value(self, value):
        self._value = value
        
    def value(self):
        return self._value
    
    def clear(self):
        pass

In [22]:
p1 = Placeholder()
p2 = Placeholder()
result = SigmoidTensor(AddTensor(p1, p2))

In [23]:
p1.set_value(np.array([0,-5,5]))
p2.set_value(np.array([0,5,8]))

In [24]:
result.value()

array([0.5       , 0.5       , 0.99999774])

In [25]:
# TODO: Ned and David fix a tensor

class MatrixMultiplyTensor:
    def __init__(self, input_tensor1, input_tensor2):
        self.input_tensor1 = input_tensor1
        self.input_tensor2 = input_tensor2
        self.subsequent_tensors = []
        input_tensor1.subsequent_tensors.append(self)
        input_tensor2.subsequent_tensors.append(self)
        self._value = None
        self._CE_gradient1 = None
        self._CE_gradient2 = None
        Session.get().add_tensor(self)
    
    def value(self):
        if self._value is None:
            self._value = np.matmul(self.input_tensor1.value(), self.input_tensor2.value())
        return self._value
    
    def CE_gradient1(self):
        if self._CE_gradient1 is not None: return self._CE_gradient1
        if len(self.subsequent_tensors) != 1:
            raise 'Incorrect number of subsequent tensors'
        self._CE_gradient1 = np.dot(self.input_tensor2.value(), self.subsequent_tensors[0].CE_gradient(self))
        return self._CE_gradient1

    def CE_gradient2(self):
        if self._CE_gradient2 is not None: return self._CE_gradient2
        if len(self.subsequent_tensors) != 1:
            raise 'Incorrect number of subsequent tensors'
        self._CE_gradient2 = np.outer(self.input_tensor1.value(), self.subsequent_tensors[0].CE_gradient(self))
        return self._CE_gradient2
    
    def CE_gradient(self, input_tensor):
        if input_tensor is self.input_tensor1: return self.CE_gradient1()
        if input_tensor is self.input_tensor2: return self.CE_gradient2()
        raise 'Unknown input tensor'
        
    def clear(self):
        self._value = None
        self._CE_gradient1 = None
        self._CE_gradient2 = None

In [26]:
class Variable:
    def __init__(self, initial_value):
        self.subsequent_tensors = []
        self._value = initial_value
        Session.get().add_tensor(self)
    
    def set_value(self, value):
        self._value = value
        
    def value(self):
        return self._value
    
    def CE_gradient(self):
        if len(self.subsequent_tensors) != 1:
            raise 'Incorrect number of subseqeuent tensors'
        return self.subsequent_tensors[0].CE_gradient(self)
    
    def clear(self):
        pass

In [27]:
def softmax(logits):
    # TODO: Not numerically stable therefore useless.
    return np.exp(logits) / np.sum(np.exp(logits), axis=1)

def log_softmax(logits):
    return logits - np.log(np.sum(np.exp(logits), axis=1))

def softmax(logits):
    return np.exp(log_softmax(logits))

class SoftmaxCELoss:
    def __init__(self, input_tensor, y):
        self.input_tensor = input_tensor
        self.y = y
        self._value = None
        self._CE_gradient = None
        input_tensor.subsequent_tensors.append(self)
        Session.get().add_tensor(self)
        
    def value(self):
        if self._value is None:
            self._value = -np.sum(
                log_softmax(self.input_tensor.value()) * self.y.value(),
                axis=1
            )
        return self._value
    
    def CE_gradient(self, input_tensor):
        if input_tensor != self.input_tensor:
            raise 'unknown input tensor'
        if self._CE_gradient is None:
            self._CE_gradient = softmax(self.input_tensor.value()) - self.y.value()
        return self._CE_gradient
    
    def clear(self):
        self._value = None
        self._CE_gradient = None

In [28]:
from keras.datasets import mnist
from keras.utils import to_categorical

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.reshape(x_train, (-1, 28*28)) / 255

x_test = np.reshape(x_test, (-1, 28*28)) / 255

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [29]:
x_train.shape

(60000, 784)

In [30]:
Session.get().reset()
W1 = Variable(np.random.normal(size=(784, 392)) / np.sqrt(784))
b1 = Variable(np.zeros((392,)))
x = Placeholder()
y = Placeholder()
z1a = MatrixMultiplyTensor(x, W1)
z1b = AddTensor(z1a, b1)
h1 = SigmoidTensor(z1b)
W2 = Variable(np.random.normal(size=(392, 10)) / np.sqrt(392))
b2 = Variable(np.zeros((10,)))
z2a = MatrixMultiplyTensor(h1, W2)
z2b = AddTensor(z2a, b2)
ce = SoftmaxCELoss(z2b, y)

In [34]:
def train(learning_rate):
    ce_value = 0
    for index, (x_value, y_value) in enumerate(zip(x_train, y_train)):
        if index % 200 == 0:
            print(index)
        Session.get().clear()
        x.set_value(x_value)
        y.set_value(y_value)
        Session.get().learn(learning_rate/x_train.shape[0])
        ce_value += ce.value()
    return ce_value/x_train.shape[0]

In [35]:
def get_accuracy():
    correctCount = 0
    for (x_value, y_value) in zip(x_train, y_train):
        Session.get().clear()
        x.set_value(x_value)
        y.set_value(y_value)
        if (np.argmax(z1b.value()) == np.argmax(y_value)):
            correctCount += 1
    return correctCount/x_train.shape[0]

In [36]:
for _ in range(100):
    print(train(0.1))
    print(get_accuracy())

0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800


KeyboardInterrupt: 