In [1]:
import numpy as np

In [14]:
class Tensor:
    def __init__(self, data, requires_grad=False, parents=(), op=''):
        self.data = np.asarray(data, dtype=float)
        self.requires_grad = requires_grad
        self.parents = parents
        self.op = op
        self.grad = np.zeros_like(self.data) if requires_grad else None
    
    def backward(self, grad=None):
        if grad is None:
            grad = np.ones_like(self.data)  # starting gradient (usually for scalar output)
        self.grad += grad

        visited = set()
        topo = []

        def build_topo(t):
            if t not in visited:
                visited.add(t)
                for p in t.parents:
                    build_topo(p)
                topo.append(t)

        build_topo(self)

        for t in reversed(topo):
            if hasattr(t, '_backward'):
                t._backward(t.grad)

    def __repr__(self):
        return f"Tensor(data={self.data}, grad={self.grad}"

    def __add__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data + other.data, requires_grad=self.requires_grad or other.requires_grad, parents=(self, other), op='+')

        def backward(grad):
            if self.requires_grad:
                self.grad += grad
            if other.requires_grad:
                other.grad += grad
        out_backward = backward
        return out
        
    def __mul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data * other.data, requires_grad=self.requires_grad or other.requires_grad, parents=(self, other), op='*')

        def backward(grad):
            if self.requires_grad:
                self.grad += grad * other.data
            if other.requires_grad:
                other.grad += grad * self.data
        out._backward = backward
        return out

    def sum(self, axis=None, keepdims=False):
        out_data = self.data.sum(axis=axis, keepdims=keepdims)
        out = Tensor(out_data, requires_grad=self.requires_grad, parents=(self,), op='sum')

        def _backward(grad):
            # Broadcast gradient to the shape of self.data
            if self.requires_grad:
                # If axis is None, grad is scalar, expand to original shape
                g = grad
                if axis is not None:
                    g = np.expand_dims(grad, axis=axis)
                self.grad += np.ones_like(self.data) * g

        out._backward = _backward
        return out

In [15]:
x = Tensor(np.array([1.0, 2.0, 3.0]), requires_grad=True)
w = Tensor(np.array([0.1, 0.2, 0.3]), requires_grad=True)

y = (x * w).sum()  # dot product, chain of ops
y.backward()

print("y =", y.data)
print("dy/dx =", x.grad)
print("dy/dw =", w.grad)


y = 1.4
dy/dx = [0.1 0.2 0.3]
dy/dw = [1. 2. 3.]
