In [9]:
import numpy as np
import warnings

In [6]:
class Tensor:
    def __init__(self, data, requires_grad=True, parents=None, creation_op=None):
        self.data = np.array(data)
        self.requires_grad = requires_grad
        self.parents = parents or []
        self.creation_op = creation_op
        self.grad = None
        if self.requires_grad:
            self.zero_grad()

    def zero_grad(self):
        self.grad = np.zeros_like(self.data)

    def backward(self, grad=None):
        if not self.requires_grad:
            return
        if grad is None and self.grad is None:
            # if self is a leaf node, we can start from 1
            grad = np.ones_like(self.data)

        if self.grad is None:
            self.grad = grad

        if self.creation_op == 'add':
            self.parents[0].backward(self.grad)
            self.parents[1].backward(self.grad)
        elif self.creation_op == 'mul':
            self.parents[0].backward(self.grad * self.parents[1].data)
            self.parents[1].backward(self.grad * self.parents[0].data)
        # TODO: add more ops

    def __add__(self, other):
        return Tensor(self.data + other.data, requires_grad=True, parents=[self, other], creation_op='add')
    
    def __mul__(self, other):
        return Tensor(self.data * other.data, requires_grad=True, parents=[self, other], creation_op='mul')
    
    # TODO: add more ops

Updating my tensor class to handle automatic differentiation

In [None]:
class Tensor:
    def __init__(self, data, requires_grad=True, parents=None, creation_op=None):
        self.data = np.array(data) # data
        self.shape = self.data.shape # shape of data
        self.requires_grad = requires_grad # whether to calculate gradients
        self.parents = parents or []
        self.creation_op = creation_op
        self.grad = self.zero_grad() if requires_grad else None # gradient of data, if needed
        self.is_scalar = self.data.ndim == 0 # whether the data is a scalar

    def zero_grad(self):
        return Tensor(np.zeros_like(self.data))
    
    def backward(self, grad=None):
        if not self.requires_grad: # if this tensor doesn't require gradients, return
            return
        if grad is None and self.grad is None:
            # if self is a leaf node, we can start from 1
            grad = Tensor(np.ones_like(self.data))
        if self.grad is None:
            self.grad = grad
        else:
            self.grad += grad # if self is a leaf node, we accumulate gradients

        # time to backpropogate
        if self.creation_op == 'add':
            self.parents[0].backward(self.grad)
            self.parents[1].backward(self.grad)
        elif self.creation_op == 'sub':
            self.parents[0].backward(self.grad)
            self.parents[1].backward(-self.grad)
        elif self.creation_op == 'mul':
            self.parents[0].backward(self.grad * self.parents[1])
            self.parents[1].backward(self.grad * self.parents[0])
        elif self.creation_op == 'div':
            self.parents[0].backward(self.grad / self.parents[1])
            self.parents[1].backward(-self.grad * self.parents[0] / self.parents[1] ** 2)
        elif self.creation_op == 'pow':
            self.parents[0].backward(self.grad * self.parents[1].data * (self.parents[0].data ** (self.parents[1].data - 1)))
            self.parents[1].backward(self.grad * (self.parents[0].data ** self.parents[1].data) * np.log(self.parents[0].data))
        elif self.creation_op == 'matmul':
            self.parents[0].backward(grad @ self.parents[1].data.T)
            self.parents[1].backward(self.parents[0].data.T @ grad)
        elif self.creation_op == 'neg':
            self.parents[0].backward(-self.grad)
        elif self.creation_op == 'transpose':
            self.parents[0].backward(self.grad.T)
        elif self.creation_op == 'abs':
            self.parents[0].backward(self.grad * np.sign(self.parents[0].data))


    # Basic Operations ============================================
    def __add__(self, other):
        if isinstance(other, (int, float)):
            return Tensor(self.data + other, requires_grad=True, parents=[self, other], creation_op='add')
        elif isinstance(other, Tensor):
            return Tensor(self.data + other.data, requires_grad=True, parents=[self, other], creation_op='add')
        else:
            raise ValueError(f"Unsupported type for addition: {type(other)}")
        
    def __sub__(self, other):
        if isinstance(other, (int, float)):
            return Tensor(self.data - other, requires_grad=True, parents=[self, other], creation_op='sub')
        elif isinstance(other, Tensor):
            return Tensor(self.data - other.data, requires_grad=True, parents=[self, other], creation_op='sub')
        else:
            raise ValueError(f"Unsupported type for subtraction: {type(other)}")
        
    def __mul__(self, other):
        if isinstance(other, (int, float)):
            return Tensor(self.data * other, requires_grad=True, parents=[self, other], creation_op='mul')
        elif isinstance(other, Tensor):
            return Tensor(self.data * other.data, requires_grad=True, parents=[self, other], creation_op='mul')
        else:
            raise ValueError(f"Unsupported type for multiplication: {type(other)}")
        
    def __truediv__(self, other):
        if isinstance(other, (int, float)):
            return Tensor(self.data / other, requires_grad=True, parents=[self, other], creation_op='div')
        elif isinstance(other, Tensor):
            return Tensor(self.data / other.data, requires_grad=True, parents=[self, other], creation_op='div')
        else:
            raise ValueError(f"Unsupported type for division: {type(other)}")
        
    def __pow__(self, other):
        if isinstance(other, (int, float)):
            return Tensor(self.data ** other, requires_grad=True, parents=[self, other], creation_op='pow')
        elif isinstance(other, Tensor):
            return Tensor(self.data ** other.data, requires_grad=True, parents=[self, other], creation_op='pow')
        else:
            raise ValueError(f"Unsupported type for power: {type(other)}")
        
    def __matmul__(self, other):
        if self.is_scalar:
            return Tensor(self.data * other.data)
        if not isinstance(other, Tensor):
            other = Tensor(other)
        
        if self.data.ndim == 0 and other.data.ndim == 0: # if both are scalars
            warnings.warn("Both of your inputs are scalars. Using element-wise multiplication instead. Use the * operator insead of @.")
            return self.data * other.data
        
        if self.data.ndim == 0 and other.data.ndim > 0: # if self is a scalar and other is not
            warnings.warn("One of your inputs is a scalar. Using element-wise multiplication instead. Use the * operator insead of @.")
            return Tensor(self.data * other.data, requires_grad=True, parents=[self, other], creation_op='mul')
        
        if self.data.ndim > 0 and other.data.ndim == 0: # if self is not a scalar and other is
            warnings.warn("One of your inputs is a scalar. Using element-wise multiplication instead. Use the * operator insead of @.")
            return Tensor(self.data * other.data, requires_grad=True, parents=[self, other], creation_op='mul')
        
        # v * v
        if self.data.ndim == 1 and other.data.ndim == 1: # if both are vectors
            if self.data.shape[0] != other.data.shape[0]: # if the vectors are not the same length
                raise ValueError(f"Cannot perform matrix multiplication on tensors with shapes {self.data.shape} and {other.data.shape}.")

        # v * m
        if self.data.ndim == 1 and other.data.ndim > 1:
            if self.data.shape[0] != other.data.shape[-2]:
                raise ValueError(f"Cannot perform matrix multiplication on tensors with shapes {self.data.shape} and {other.data.shape}.")
            
        # m * v
        if self.data.ndim > 1 and other.data.ndim == 1:
            if self.data.shape[-1] != other.data.shape[0]:
                raise ValueError(f"Cannot perform matrix multiplication on tensors with shapes {self.data.shape} and {other.data.shape}.")

        #  m * m
        if self.data.ndim > 1 and other.data.ndim > 1:
            if self.data.shape[-1] != other.data.shape[-2]:
                raise ValueError(f"Cannot perform matrix multiplication on tensors with shapes {self.data.shape} and {other.data.shape}.")

        result = np.matmul(self.data, other.data)
        return Tensor(result, requires_grad=True, parents=[self, other], creation_op='matmul')
    
    # Reverse Operations ============================================
    def __radd__(self, other):
        return self.__add__(other)

    def __rsub__(self, other):
        return Tensor(other - self.data)  # Note the order

    def __rmul__(self, other):
        return self.__mul__(other)

    def __rtruediv__(self, other):
        return Tensor(other / self.data)  # Note the order
    
    def __rpow__(self, other):
        return Tensor(other ** self.data)
    
    def __rmatmul__(self, other):
        if not isinstance(other, Tensor):
            other = Tensor(other)
        # The other array should be on the left-hand side now
        # assuming self.data and other.data are NumPy arrays
        result = np.matmul(other.data, self.data)
        return Tensor(result)
    
    # Unary Operations ============================================
    def __neg__(self):
        return self * -1
    
    def __pos__(self):
        return self
    
    def __abs__(self):
        return Tensor(np.abs(self.data), requires_grad=self.requires_grad, parents=[self], creation_op='abs')