# Lab 1.5.1 Solutions: Core Tensor Implementation

This notebook contains solutions to the exercises from notebook 01.

---

## Exercise 1 Solution: Extended Value Class

Implement subtraction, negation, and power operations for the Value class.

In [None]:
class Value:
    """
    Extended Value class with subtraction, negation, and power operations.
    """
    
    def __init__(self, data, _children=(), _op=''):
        self.data = float(data)
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
    
    def __repr__(self):
        return f"Value(data={self.data:.4f}, grad={self.grad:.4f})"
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        
        return out
    
    def __radd__(self, other):
        return self + other
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        
        return out
    
    def __rmul__(self, other):
        return self * other
    
    # SOLUTION: Negation
    def __neg__(self):
        """
        Negation: -self
        
        If y = -x, then dy/dx = -1
        So x.grad += -1 * out.grad
        
        Simpler approach: -x = x * (-1)
        """
        return self * -1
    
    # SOLUTION: Subtraction
    def __sub__(self, other):
        """
        Subtraction: self - other
        
        We can implement this as: self + (-other)
        """
        return self + (-other)
    
    def __rsub__(self, other):
        """Handle: number - Value"""
        return (-self) + other
    
    # SOLUTION: Power
    def __pow__(self, n):
        """
        Power: self ** n (where n is a number, not a Value)
        
        If y = x^n, then dy/dx = n * x^(n-1)
        
        By chain rule: x.grad += n * x^(n-1) * out.grad
        """
        assert isinstance(n, (int, float)), "Power must be a number"
        out = Value(self.data ** n, (self,), f'**{n}')
        
        def _backward():
            # d(x^n)/dx = n * x^(n-1)
            self.grad += n * (self.data ** (n - 1)) * out.grad
        out._backward = _backward
        
        return out
    
    def backward(self):
        topo = []
        visited = set()
        
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        
        build_topo(self)
        self.grad = 1.0
        
        for v in reversed(topo):
            v._backward()

In [None]:
# Test the solutions
print("Testing Extended Value Class")
print("=" * 50)

# Test negation
x = Value(3.0)
y = -x
print(f"Negation: -3 = {y.data} ✓" if y.data == -3 else f"Negation: FAILED")

# Test subtraction
a = Value(5.0)
b = Value(3.0)
c = a - b
print(f"Subtraction: 5 - 3 = {c.data} ✓" if c.data == 2 else f"Subtraction: FAILED")

# Test power
x = Value(3.0)
y = x ** 2
print(f"Power: 3^2 = {y.data} ✓" if y.data == 9 else f"Power: FAILED")

# Test power gradient
x = Value(3.0)
y = x ** 2
y.backward()
print(f"Power gradient: d(x^2)/dx at x=3 = {x.grad} ✓" if x.grad == 6 else f"Power gradient: FAILED (expected 6)")

# Test combined expression: y = x^2 - x
x = Value(3.0)
y = x ** 2 - x  # y = 9 - 3 = 6
y.backward()    # dy/dx = 2x - 1 = 2*3 - 1 = 5
print(f"\nCombined: x^2 - x at x=3")
print(f"  Value: {y.data} (expected 6) ✓" if y.data == 6 else f"  Value: FAILED")
print(f"  Gradient: {x.grad} (expected 5) ✓" if x.grad == 5 else f"  Gradient: FAILED")

---

## Challenge Solution: Division, Exp, Log, and Softmax

Here are implementations of more complex operations.

In [None]:
import math

class ValueAdvanced(Value):
    """
    Advanced Value class with division, exp, log operations.
    """
    
    def __truediv__(self, other):
        """
        Division: self / other
        
        We can implement as: self * other^(-1)
        
        Or directly:
        If y = a / b, then:
          dy/da = 1/b
          dy/db = -a / b^2
        """
        return self * (other ** -1)
    
    def __rtruediv__(self, other):
        """Handle: number / Value"""
        return other * (self ** -1)
    
    def exp(self):
        """
        Exponential: e^self
        
        If y = e^x, then dy/dx = e^x = y
        """
        out = ValueAdvanced(math.exp(self.data), (self,), 'exp')
        
        def _backward():
            # d(e^x)/dx = e^x = out.data
            self.grad += out.data * out.grad
        out._backward = _backward
        
        return out
    
    def log(self):
        """
        Natural logarithm: log(self)
        
        If y = log(x), then dy/dx = 1/x
        """
        assert self.data > 0, "Log requires positive input"
        out = ValueAdvanced(math.log(self.data), (self,), 'log')
        
        def _backward():
            # d(log(x))/dx = 1/x
            self.grad += (1.0 / self.data) * out.grad
        out._backward = _backward
        
        return out
    
    def tanh(self):
        """
        Hyperbolic tangent: tanh(self)
        
        If y = tanh(x), then dy/dx = 1 - tanh(x)^2 = 1 - y^2
        """
        t = math.tanh(self.data)
        out = ValueAdvanced(t, (self,), 'tanh')
        
        def _backward():
            # d(tanh(x))/dx = 1 - tanh(x)^2
            self.grad += (1 - t ** 2) * out.grad
        out._backward = _backward
        
        return out
    
    def relu(self):
        """
        ReLU: max(0, self)
        
        If y = ReLU(x):
          dy/dx = 1 if x > 0
          dy/dx = 0 if x <= 0
        """
        out = ValueAdvanced(max(0, self.data), (self,), 'relu')
        
        def _backward():
            self.grad += (1.0 if self.data > 0 else 0.0) * out.grad
        out._backward = _backward
        
        return out

In [None]:
# Test advanced operations
print("Testing Advanced Operations")
print("=" * 50)

# Test division
a = ValueAdvanced(6.0)
b = ValueAdvanced(2.0)
c = a / b
print(f"Division: 6 / 2 = {c.data} ✓" if abs(c.data - 3.0) < 1e-6 else "Division: FAILED")

# Test exp
x = ValueAdvanced(0.0)
y = x.exp()
print(f"Exp: e^0 = {y.data} ✓" if abs(y.data - 1.0) < 1e-6 else "Exp: FAILED")

# Test exp gradient
x = ValueAdvanced(1.0)
y = x.exp()
y.backward()
print(f"Exp gradient: d(e^x)/dx at x=1 = {x.grad:.4f} (expected {math.e:.4f}) ✓" 
      if abs(x.grad - math.e) < 1e-4 else "Exp gradient: FAILED")

# Test log
x = ValueAdvanced(math.e)
y = x.log()
print(f"Log: log(e) = {y.data} ✓" if abs(y.data - 1.0) < 1e-6 else "Log: FAILED")

# Test log gradient
x = ValueAdvanced(2.0)
y = x.log()
y.backward()
print(f"Log gradient: d(log(x))/dx at x=2 = {x.grad} (expected 0.5) ✓" 
      if abs(x.grad - 0.5) < 1e-6 else "Log gradient: FAILED")

# Test ReLU
x = ValueAdvanced(-2.0)
y = x.relu()
print(f"ReLU: relu(-2) = {y.data} ✓" if y.data == 0 else "ReLU: FAILED")

x = ValueAdvanced(3.0)
y = x.relu()
print(f"ReLU: relu(3) = {y.data} ✓" if y.data == 3 else "ReLU: FAILED")

---

## Key Takeaways

1. **Negation** can be implemented as multiplication by -1
2. **Subtraction** can be implemented as addition with negation
3. **Division** can be implemented as multiplication with power of -1
4. For **power** $y = x^n$, the gradient is $n \cdot x^{n-1}$
5. For **exp** $y = e^x$, the gradient is $e^x$ (the output itself)
6. For **log** $y = \log(x)$, the gradient is $1/x$
7. For **ReLU** $y = \max(0, x)$, the gradient is 1 if $x > 0$, else 0

The pattern is always:
1. Compute the output
2. Define `_backward` that updates parent gradients using chain rule
3. The local gradient (derivative of operation) is multiplied by upstream gradient (`out.grad`)