In [1]:
import numpy as np

In [2]:
class Tensor:
    """
    Tensor
    """
    def __init__(self, numpy_array):
        self.value = numpy_array
        self.previous_tensor = None
        self.operation = None
        self.grad = None
        
    def backward(self):
        # check
        if not self.value.size == 1:
            raise ValueError("The tensor you are trying to take derivative must be a scalar.\
                             (For example, you should call `.backward()` after `.mean` operation)")
        
        p_ahead = self
        p = self.previous_tensor
        grad = None
        while p is not None:
            if p.operation[0] == 'add':
                if grad is None:
                    grad = 1
                else:
                    grad = grad
            elif p.operation[0] == 'sub':
                if grad is None:
                    grad = 1
                else:
                    grad = grad
            elif p.operation[0] == 'rsub':
                if grad is None:
                    grad = -1
                else:
                    grad = grad * -1
            elif p.operation[0] == 'mul':
                if grad is None:
                    grad = p.operation[1]
                else:
                    grad = grad * p.operation[1]
            elif p.operation[0] == 'mean':
                if grad is None:
                    grad = 1.0 / p.value.size
                else:
                    grad = grad * (1.0 / p.size)
            else:
                raise ValueError("Unknown operation")
            print('grad:', grad)
            p = p.previous_tensor
            p_ahead = p_ahead.previous_tensor
        p_ahead.grad = grad
        
    def __add__(self, other):
        self.operation = ('add', other)
        operation_result = self.value + other
        new_tensor = Tensor(numpy_array=operation_result)
        new_tensor.previous_tensor = self
        return new_tensor
    
    def __radd__(self, other):
        return self.__add__(other)
    
    def __sub__(self, other):
        self.operation = ('sub', other)
        operation_result = self.value - other
        new_tensor = Tensor(numpy_array=operation_result)
        new_tensor.previous_tensor = self
        return new_tensor
        
    def __rsub__(self, other):
        self.operation = ('rsub', other)
        operation_result = other - self.value
        new_tensor = Tensor(numpy_array=operation_result)
        new_tensor.previous_tensor = self
        return new_tensor
    
    def __mul__(self, other):
        self.operation = ('mul', other)
        operation_result = self.value * other
        new_tensor = Tensor(numpy_array=operation_result)
        new_tensor.previous_tensor = self
        return new_tensor
    
    def mean(self):
        self.operation = ('mean', None)
        operation_result = self.value.mean()
        new_tensor = Tensor(numpy_array=operation_result)
        new_tensor.previous_tensor = self
        return new_tensor
    
    def __str__(self):
        return self.value.__repr__()

In [3]:
arr1 = np.ones((2, 3))
t = Tensor(arr1)
t

<__main__.Tensor at 0x7f20802ed860>

In [5]:
w = np.random.rand(2, 3)
w

array([[0.73837039, 0.43769927, 0.05020489],
       [0.24016755, 0.93390861, 0.8311483 ]])

In [6]:
t2 = t * w
t2

<__main__.Tensor at 0x7f20802ed8d0>

In [8]:
t3 = t2.mean()
t3

<__main__.Tensor at 0x7f2060874b70>

In [9]:
t3.backward()

grad: 0.16666666666666666
grad: [[0.12306173 0.07294988 0.00836748]
 [0.04002793 0.15565144 0.13852472]]


In [10]:
w / 6.0

array([[0.12306173, 0.07294988, 0.00836748],
       [0.04002793, 0.15565144, 0.13852472]])

In [11]:
t.grad

array([[0.12306173, 0.07294988, 0.00836748],
       [0.04002793, 0.15565144, 0.13852472]])

In [5]:
t3 = t1 + t2

tensor


In [6]:
t3.value.shape

(2, 3)

In [9]:
t3.value[0, 0].value

array([[1.38842852, 1.42401579, 1.10704218],
       [1.63677107, 1.56811515, 1.42216619]])

In [45]:
arr = np.ones((2, 3))
arr

array([[1., 1., 1.],
       [1., 1., 1.]])

In [46]:
t = Tensor(arr)
t

<__main__.Tensor at 0x7f44081c9f98>

In [47]:
t2 = t * 2

In [48]:
t3 = t2 * 3

In [49]:
t4 = t3 + 2

In [50]:
t4.value

array([[8., 8., 8.],
       [8., 8., 8.]])

In [52]:
t5 = t4.mean()

In [54]:
t5.backward()

grad: 0.16666666666666666
grad: 0.16666666666666666
grad: 0.5
grad: 1.0


In [56]:
t2 + t4

<__main__.Tensor at 0x7f44081d0f28>

In [48]:
import torch

In [49]:
a = torch.ones(2, 3, requires_grad=True)
a

tensor([[1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)

In [50]:
b = a * 1
for _ in range(3):
    b = b * 2

In [51]:
b = b.mean()

In [52]:
b.backward()

In [53]:
a.grad

tensor([[1.3333, 1.3333, 1.3333],
        [1.3333, 1.3333, 1.3333]])