In [1]:
import numpy as np

In [6]:
class Variable():
    def __init__(self, data, T=None, grad=None, copy=True):
        if data is None or type(data) != np.ndarray:
            raise AttributeError('Wrong data type')
        
        if copy:
            self.data = data.copy()
        else:
            self.data = data
        if grad is None:
            grad = np.zeros_like(self.data)
        self.grad = grad
        if T is None:
            T = Variable(self.data.T, self, self.grad.T, copy=False)
        self.T = T
        self.fn = None
    
    def __repr__(self):
        return 'Variable(\n{}\n)\n'.format(self.data.__str__())
    
    def __str__(self):
        return self.data.__str__()
    
    def __add__(self, b):
        if type(b) is not Variable:
            b = Variable(np.ones_like(self.data)*b)
            
        c = Variable(self.data + b.data)
        c.fn = [Variable.__grad_add__, self, b]
        #c.fn = c.__wrap_fn(Variable.__grad_add__, self, b)
        return c
    
    def __grad_add__(self, a, b):
        a.grad += np.ones_like(a.grad) * self.grad
        b.grad += np.ones_like(b.grad) * self.grad
    
    def __sub__(self, b):
        if type(b) is not Variable:
            b = Variable(np.ones_like(self.data)*b)
        c = Variable(self.data - b.data)
        c.fn = [Variable.__grad_sub__, self, b]
        #c.fn = c.__wrap_fn(Variable.__grad_sub__, self, b)
        return c
    
    def __grad_sub__(self, a, b):
        a.grad += np.ones_like(a.grad) * self.grad
        b.grad -= np.ones_like(b.grad) * self.grad
    
    def __mul__(self, b):
        if type(b) is not Variable:
            b = Variable(np.ones_like(self.data)*b)
        
        c = Variable(self.data * b.data)
        c.fn = [Variable.__grad_mul__, self, b]
        #c.fn = c.__wrap_fn(Variable.__grad_mul__, self, b)
        return c
    
    def __grad_mul__(self, a, b):
        a.grad += b.data * self.grad
        b.grad += a.data * self.grad
    
    def __matmul__(self, b):
        c = Variable(np.matmul(self.data, b.data))
        c.fn = [Variable.__grad_matmul__, self, b]
        #c.fn = c.__wrap_fn(Variable.__grad_matmul__, self, b)
        return c
    
    def __grad_matmul__(self, a, b):
        a.grad += np.matmul(self.grad, b.data.T)
        b.grad += np.matmul(a.data.T, self.grad)
    
    
    def tanh(self):
        c = Variable(np.tanh(self.data))
        c.fn = [Variable.__grad_tanh__, self]
        return c
        
    def __grad_tanh__(self, a):
        a.grad += self.grad * (1 - (self.data**2))
    
    def crossentropy(self, target):
        s = self.__softmax(1)
        if type(target) is Variable:
            target = target.data
            
        target = target.astype(np.int)
        
        if target.shape[0] > 1:
            slis = tuple(zip(range(target.shape[0]), target))
        else:
            slis = (0, target[0])
        
        c = Variable(np.array(np.sum(-np.log(s[slis]))))
        c.fn = [Variable.__grad_corssentropy, self, target]
        return c
    
    def __grad_corssentropy(self, a, target):
        y = np.zeros_like(a.grad)
        if target.shape[0] > 1:
            slis = tuple(zip(range(target.shape[0]), target))
        else:
            slis = (0, target[0])
            
        y[slis] = 1.0
        a.grad += (a.__softmax(1) - y)
    
    def __softmax(self, dim):
        # move dim idxs
        exp_data = np.exp(self.data)
        return exp_data / np.sum(exp_data, axis=dim).reshape([-1]+[1 for _ in range(dim)])
    
    def backward(self, backward_grad):
        if type(backward_grad) is Variable:
            backward_grad = backward_grad.data
        
        if backward_grad.shape != self.data.shape:
            raise AttributeError('Wrong backward grad shape {} != {}'.format(backward_grad.shape, self.data.shape))
        
        self.grad = backward_grad
        self.__backward()
        
    # use grad as nparray
    def __backward(self):
        if self.fn is None:
            return;
        
        backward_op = self.fn[0]
        
        backward_op(self, *self.fn[1:])
        
        for v in self.fn[1:]:
            if type(v) is Variable:
                v.__backward()

In [26]:
import torch

def testGrad():
    n = 1
    c = 5
    x = np.random.random((n,2))
    h = np.random.random((n,16))
    y = np.random.randint(0, c, n)
    
    u = np.random.random((2, 16))
    w = np.random.random((16,16))
    v = np.random.random((16, c))
    
    def equation(*args):
        x, h, u, w, v = args
        return ((x @ u) + (h @ w)).tanh() @ v
    
    def allv(l):
        return [Variable(i) for i in l]

    def allt(l):
        return [torch.tensor(i, requires_grad=True) for i in l]
    
    wow = [x, h, u, w, v]
    
    Vs = allv(wow)
    Ts = allt(wow)
    Y = Variable(y)
    _Y = torch.tensor(y.astype(np.float), requires_grad=True)
    
    A = equation(*Vs)
    A = A.crossentropy(Y)
    _A = equation(*Ts)
    _A = torch.nn.CrossEntropyLoss(reduction='sum')(_A, _Y.long())
    
    print(A, _A)
    
    A.backward(np.array(1))
    _A.backward()
    
    t = None
    
    for V, T in zip(Vs, Ts):
        r = (torch.tensor(V.data) == T).all()
        if t is None:
            t = r
        else:
            t &= r
    
    return t
testGrad()

2.284946979038115 tensor(2.2849, dtype=torch.float64, grad_fn=<NllLossBackward>)


tensor(1, dtype=torch.uint8)