In [1]:
import numpy as np

In [2]:
class Tensor:
    """
    Tensor
    """
    def __init__(self, numpy_array, name=None):
        if not isinstance(numpy_array, np.ndarray):
            raise ValueError("Must be initialized with a Numpy array")
        self.value = numpy_array
        self.name = str(name)
        self.previous_tensors = None
        self.operation = None  # type of operation done to create this tensor.
        self.grad = None
        self.grads = list()
        
    def backward(self):
        self._compute_grads(self)
        
    def get_grad(self):
        return sum(self.grads)
    
    def _compute_grads(self, tensor):
        if tensor.name == 't5':
            print('grad', str(tensor.grad))
        
        if tensor.grad is None:
            tensor.grad = np.full(self.value.shape, 1)
        
        if tensor.previous_tensors is None:
            return
        else:
            t1, t2 = tensor.previous_tensors
            if t1.grad is None:
                t1.grad = 0
            if t2.grad is None:
                t2.grad = 0
            
            if tensor.operation == 'add':
                t1.grad = np.copy(tensor.grad)
                t2.grad = np.copy(tensor.grad)
                t1.grads.append(np.copy(t1.grad))
                t2.grads.append(np.copy(t2.grad))
            elif tensor.operation == 'mul':
                t1.grad = np.copy(tensor.grad * t2.value)
                t2.grad = np.copy(tensor.grad * t1.value)
                t1.grads.append(np.copy(t1.grad))
                t2.grads.append(np.copy(t2.grad))
            else:
                raise ValueError("Invalid operation")
#             tensor.grad = None  # remove for saving memory
            self._compute_grads(t1)
            self._compute_grads(t2)
        
    @classmethod
    def _accumulate_grads(cls, tensor):
        q = list()
        q.append(tensor)
        
    
    def _add_tensor(self, other):
        """
        Tensor addition (core)
        """
        if isinstance(other, Tensor):
            # check shapes
            if not self.value.shape == other.value.shape:
                raise ValueError("Tensors must have the same shape. shape1: {}, shape2: {}".\
                                     format(str(self.value.shape), str(other.value.shape)))
            operation_result = self.value + other.value
            new_tensor = Tensor(operation_result)
            new_tensor.operation = 'add'
            new_tensor.previous_tensors = [self, other]
            return new_tensor
        else:
            raise ValueError("Operands must be of `Tensor` type.")
    
    def _mul_tensor(self, other):
        """
        Tensor multiplication (core)
        """
        if isinstance(other, Tensor):
            # check shapes
            if not self.value.shape == other.value.shape:
                raise ValueError("Tensors must have the same shape. shape1: {}, shape2: {}".\
                                     format(str(self.value.shape), str(other.value.shape)))
            operation_result = self.value * other.value
            new_tensor = Tensor(operation_result)
            new_tensor.operation = 'mul'
            new_tensor.previous_tensors = [self, other]
            return new_tensor
        else:
            raise ValueError("Operands must be of `Tensor` type.")
            
    def _convert_other_to_tensor(self, other):
        """
        Convert 'other' to Tensor with proper shape for 
        proper operation.
        """
        # convert other to Tensor
        if isinstance(other, int) or isinstance(other, float):
            t = Tensor(np.full(self.value.shape, other))
        elif isinstance(other, np.ndarray):
            other = np.broadcast_to(other, self.value.shape)
            t = Tensor(other)
        elif isinstance(other, Tensor):
            t = other  # no need to do anything
        else:
            raise ValueError("Invalid type")
        return t
    
    def __add__(self, other):
        print('add is called')
        # convert other to tensor
        t = self._convert_other_to_tensor(other)
        # do tensor addition
        return self._add_tensor(t)
    
    def __radd__(self, other):
        print('radd is called')
        return self.__add__(other)
            
    def __mul__(self, other):
        print('mul is called')
        # convert other to tensor
        t = self._convert_other_to_tensor(other)
        # do tensor multiplication
        return self._mul_tensor(t)
    
    def __rmul__(self, other):
        print('rmul is called')
        return self.__mul__(other)
        
    def __sub__(self, other):
        # convert other to tensor
        t = self._convert_other_to_tensor(other * (-1))
        # do tensor addition
        return self._add_tensor(t)
    
    def __str__(self):
        r = self.value.__repr__()
        r = r.replace('array', 'Tensor')
        if self.name:
            r = self.name + ', ' + r
        return r
    
    def __repr__(self):
        r = self.value.__repr__()
        r = r.replace('array', 'Tensor')
        if self.name:
            r = self.name + ', ' + r
        return r

In [3]:
arr1 = np.random.rand(2, 3)
arr2 = np.random.rand(2, 3)

In [4]:
t1 = Tensor(arr1)
t2 = Tensor(arr2)
print('t1:', t1)
print('t2:', t2)

t1: None, Tensor([[0.33660213, 0.81649001, 0.95869224],
       [0.60528311, 0.90428262, 0.8819401 ]])
t2: None, Tensor([[0.03965075, 0.07126047, 0.85544028],
       [0.71360469, 0.9093446 , 0.26482009]])


In [5]:
t1.name = 't1'
t5 = t1 + t1
t5.name = 't5'
t6 = t5 * 3 + t5 
t6.name = 't6'

add is called
mul is called
add is called


In [6]:
t6.backward()

grad [[3 3 3]
 [3 3 3]]
grad [[3 3 3]
 [3 3 3]]


In [7]:
t1.get_grad()

array([[12, 12, 12],
       [12, 12, 12]])

In [8]:
print(len(t5.grads))
for _ in t5.grads:
    print(_)

2
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]


In [9]:
t5.previous_tensors[0].grads

[array([[3, 3, 3],
        [3, 3, 3]]), array([[3, 3, 3],
        [3, 3, 3]]), array([[3, 3, 3],
        [3, 3, 3]]), array([[3, 3, 3],
        [3, 3, 3]])]

In [15]:
print(len(t1.grads))
for _ in t1.grads:
    print(_)

4
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]


In [8]:
print(len(t5.grads))
for _ in t5.grads:
    print(_)

2
[[1 1 1]
 [1 1 1]]
[[3 3 3]
 [3 3 3]]


In [9]:
print(len(t1.grads))
for _ in t1.grads:
    print(_)

6
[[1 1 1]
 [1 1 1]]
[[1 1 1]
 [1 1 1]]
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]
[[3 3 3]
 [3 3 3]]
[[9 9 9]
 [9 9 9]]


In [140]:
for _ in t5.grads:
    print(_)

[[1 1 1]
 [1 1 1]]
[[3 3 3]
 [3 3 3]]


In [7]:
t1.grad

array([[3, 3, 3],
       [3, 3, 3]])

In [64]:
from collections import defaultdict

In [65]:
q = list()
tensors_dict = defaultdict(int)
q.append(t6)

In [66]:
while q:
    t = q.pop(0)
    t1, t2 = t.previous_tensors
    tensors_dict[hash(t1)] = tensors_dict[hash(t1)] + t1.grad
    tensors_dict[hash(t2)] = tensors_dict[hash(t2)] + t2.grad
    
    if t1.previous_tensors:
        q.append(t1)
        
    if t2.previous_tensors:
        q.append(t2)

In [67]:
for k in tensors_dict:
    print(k)
    print(tensors_dict[k])
    print('***')

286213808
[[2 2 2]
 [2 2 2]]
***
-9223372036568561927
[[2 2 2]
 [2 2 2]]
***
286213843
[[2 2 2]
 [2 2 2]]
***
286213871
[[18 18 18]
 [18 18 18]]
***
286213689
[[1.22730914 0.27408566 0.23691497]
 [1.05669763 1.73574169 0.59884902]]
***


In [47]:
hash(t6.previous_tensors[0])

-9223372036568819277

In [5]:
t10 = (t1 - 2) * (t1 - 2)

In [6]:
t10.backward()

In [7]:
t1.grad

array([[-2.99519256, -3.70566587, -3.57598042],
       [-2.62173808, -2.41002425, -2.09688913]])

In [11]:
(2 * t1) - 4

Tensor([[-1.49759628, -1.85283293, -1.78799021],
       [-1.31086904, -1.20501212, -1.04844457]])

In [14]:
t1 + 2

Tensor([[2.50240372, 2.14716707, 2.21200979],
       [2.68913096, 2.79498788, 2.95155543]])

In [47]:
t10 = (t1 + 2) / (t1 + 2)

TypeError: unsupported operand type(s) for /: 'Tensor' and 'Tensor'

In [44]:
t10.backward()

In [45]:
t1.grad

array([[5.52619492, 4.71267724, 5.95470321],
       [4.45144276, 4.1928202 , 5.00457934]])

In [46]:
2 * arr1 + 4

array([[5.52619492, 4.71267724, 5.95470321],
       [4.45144276, 4.1928202 , 5.00457934]])

In [6]:
t20.backward()

NameError: name 't20' is not defined

In [None]:
t1.grad

In [6]:
import torch

In [7]:
t = torch.rand(2, 3)
t

tensor([[0.8264, 0.3591, 0.8097],
        [0.5179, 0.3105, 0.6348]])

In [9]:
a = 'txt_another'

In [11]:
a.replace('txt', 'some')

'some_another'