From d73bc0c4de72206ddd8e5518c6f4340bab0840bd Mon Sep 17 00:00:00 2001
From: benedictclarkson1 <benedictclarkson1@gmail.com>
Date: Sun, 14 Jan 2024 18:32:21 +0000
Subject: [PATCH] corrected some bugs

---
 src/tricycle_v2/binary.py |  3 +--
 src/tricycle_v2/loss.py   |  7 ++++---
 src/tricycle_v2/ops.py    |  6 +++---
 src/tricycle_v2/reduce.py |  6 ++++--
 src/tricycle_v2/tensor.py | 29 +++++++++++++++++-----------
 src/tricycle_v2/unary.py  |  4 ++--
 tests/test_loss.py        | 40 ++++++++++++++++++++++++++-------------
 7 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/src/tricycle_v2/binary.py b/src/tricycle_v2/binary.py
index 81f8f14..5e22458 100644
--- a/src/tricycle_v2/binary.py
+++ b/src/tricycle_v2/binary.py
@@ -32,8 +32,7 @@ def bsub(tensor_1: Tensor, tensor_2: Tensor) -> Tensor:
     """
     assert tensor_1.shape == tensor_2.shape
 
-    tensor_2_neg = umul(tensor_2, -1)
-    return badd(tensor_1, tensor_2_neg)
+    return badd(tensor_1, umul(tensor_2, -1))
 
 
 def bmul(tensor_1: Tensor, tensor_2: Tensor) -> Tensor:
diff --git a/src/tricycle_v2/loss.py b/src/tricycle_v2/loss.py
index f19c5b9..2b6fdf1 100644
--- a/src/tricycle_v2/loss.py
+++ b/src/tricycle_v2/loss.py
@@ -1,15 +1,16 @@
 from string import ascii_lowercase
 
-from tricycle_v2.tensor import Tensor
+from tricycle_v2.binary import bsub
 from tricycle_v2.reduce import radd
+from tricycle_v2.tensor import Tensor
 
 
 def mean_squared_error(y_true: Tensor, y_pred: Tensor) -> Tensor:
     """
     Calcuate the mean square error along the final index of a tensor
     """
-    square_error = (y_true - y_pred) ** 2
+    square_error = (y_true - y_pred)**2
     indices = ascii_lowercase[: len(square_error.shape)]
     subscript = f"{indices}->{indices[:-1]}"
     total_error = radd(square_error, subscript)
-    return total_error / y_true.shape[-1]
+    return total_error / square_error.shape[-1]
diff --git a/src/tricycle_v2/ops.py b/src/tricycle_v2/ops.py
index ddaa7b9..761b984 100644
--- a/src/tricycle_v2/ops.py
+++ b/src/tricycle_v2/ops.py
@@ -42,12 +42,12 @@ def repeat(subscripts, tensor, out_shape):
 
     one_indices = ""
     one_shape = []
-    for i, out_idx in enumerate(output):
+    for size, out_idx in zip(out_shape, output):
         if out_idx not in index:
             one_indices += out_idx
-            one_shape.append(out_shape[i])
+            one_shape.append(size)
 
-    ones = to_tensor(np.ones(one_shape))
+    ones = to_tensor(np.ones(one_shape), requires_grad=False)
     new_subscript = f"{one_indices},{index}->{output}"
     return einsum(new_subscript, ones, tensor)
 
diff --git a/src/tricycle_v2/reduce.py b/src/tricycle_v2/reduce.py
index ac97014..ebbf7ee 100644
--- a/src/tricycle_v2/reduce.py
+++ b/src/tricycle_v2/reduce.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from tricycle_v2.ops import _parse_subscripts, einsum, to_tensor
-from tricycle_v2.tensor import Tensor
+from tricycle_v2.ops import _parse_subscripts, einsum
+from tricycle_v2.tensor import Tensor, to_tensor
 
 
 def radd(tensor: Tensor, subscript: str):
@@ -53,6 +53,7 @@ def rmax(tensor: Tensor, subscript: str):
     indicator = (
         tensor == np.max(tensor, axis=tuple(reduce_along_axes), keepdims=True)
     ).astype(int)
+    indicator = to_tensor(indicator, requires_grad=False)
 
     new_subscript = f"{idx},{idx}->{output}"
 
@@ -79,6 +80,7 @@ def rmin(tensor: Tensor, subscript: str):
     indicator = (
         tensor == np.min(tensor, axis=tuple(reduce_along_axes), keepdims=True)
     ).astype(int)
+    indicator = to_tensor(indicator, requires_grad=False)
 
     new_subscript = f"{idx},{idx}->{output}"
 
diff --git a/src/tricycle_v2/tensor.py b/src/tricycle_v2/tensor.py
index 45b4708..a0692f6 100644
--- a/src/tricycle_v2/tensor.py
+++ b/src/tricycle_v2/tensor.py
@@ -1,7 +1,11 @@
+import logging
+from collections import defaultdict
 from typing import Callable, Dict, List, Optional, Tuple
 
 import numpy as np
 
+logger = logging.getLogger(__name__)
+
 Op = Callable[..., "Tensor"]
 
 
@@ -22,6 +26,7 @@ class Tensor(np.ndarray):
     def backward(self):
         stack: List[Tuple[Tensor, List[Op]]] = [(self, [])]
         leaves: Dict[int, Tensor] = {}
+        adjecency_matrix = defaultdict(list)
 
         # Find every route to a differentiable parameter
         while stack:
@@ -38,11 +43,13 @@ def backward(self):
 
             else:
                 for arg, op in zip(current_node.args, current_node.back_fn):
+                    logger.info(f"{hash(current_node)=} {hash(arg)=} {op=}")
                     if not arg.requires_grad:
                         continue
 
                     new_gradient = current_gradient + [op]
                     stack.append((arg, new_gradient))
+                    adjecency_matrix[(hash(current_node))].append(hash(arg))
 
         # calculate the gradient for each parameter
         for leaf in leaves.values():
@@ -62,7 +69,7 @@ def __hash__(self) -> int:
         return id(self)
 
     def __add__(self, other):
-        if isinstance(other, np.ndarray):
+        if isinstance(other, np.ndarray) and not isinstance(other, Tensor):
             other = to_tensor(other)
         if np.isscalar(other):
             from tricycle_v2.unary import uadd
@@ -76,11 +83,10 @@ def __add__(self, other):
             raise NotImplementedError(f"Cannot add {type(self)} and {type(other)}")
 
     def __iadd__(self, other):
-        self = self + other
-        return self
+        return self + other
 
     def __sub__(self, other):
-        if isinstance(other, np.ndarray):
+        if isinstance(other, np.ndarray) and not isinstance(other, Tensor):
             other = to_tensor(other)
         if np.isscalar(other):
             from tricycle_v2.unary import usub
@@ -95,11 +101,10 @@ def __sub__(self, other):
             raise NotImplementedError(f"Cannot sub {type(self)} and {type(other)}")
 
     def __isub__(self, other):
-        self = self - other
-        return self
+        return self - other
 
     def __mul__(self, other):
-        if isinstance(other, np.ndarray):
+        if isinstance(other, np.ndarray) and not isinstance(other, Tensor):
             other = to_tensor(other)
         if np.isscalar(other):
             from tricycle_v2.unary import umul
@@ -115,11 +120,10 @@ def __mul__(self, other):
             raise NotImplementedError(f"Cannot mul {type(self)} and {type(other)}")
 
     def __imul__(self, other):
-        self = self * other
-        return self
+        return self * other
 
     def __truediv__(self, other):
-        if isinstance(other, np.ndarray):
+        if isinstance(other, np.ndarray) and not isinstance(other, Tensor):
             other = to_tensor(other)
         if np.isscalar(other):
             from tricycle_v2.unary import udiv
@@ -133,6 +137,9 @@ def __truediv__(self, other):
         else:
             raise NotImplementedError(f"Cannot divide {type(self)} and {type(other)}")
 
+    def __itruediv__(self, other):
+        return self / other
+
     def __floordiv__(self, _):
         raise NotImplementedError("Cannot floor divide")
 
@@ -140,7 +147,7 @@ def __mod__(self, _):
         raise NotImplementedError("Cannot mod")
 
     def __pow__(self, other):
-        if isinstance(other, np.ndarray):
+        if isinstance(other, np.ndarray) and not isinstance(other, Tensor):
             other = to_tensor(other)
         if np.isscalar(other):
             from tricycle_v2.unary import upow
diff --git a/src/tricycle_v2/unary.py b/src/tricycle_v2/unary.py
index 3279b72..4ea8de9 100644
--- a/src/tricycle_v2/unary.py
+++ b/src/tricycle_v2/unary.py
@@ -66,7 +66,7 @@ def upow(tensor: Tensor, constant: float) -> Tensor:
     result = to_tensor(np.power(tensor, constant))
     result.args = (tensor,)
 
-    coeff = to_tensor(np.power(tensor, constant - 1))
+    coeff = to_tensor(np.power(tensor, constant - 1), requires_grad=False)
     coeff = umul(coeff, constant)
 
     assert coeff.shape == tensor.shape
@@ -101,7 +101,7 @@ def umax(tensor: Tensor, constant: float) -> Tensor:
 
     result = to_tensor(np.maximum(tensor, constant))
 
-    indicator = to_tensor((tensor > constant).astype(float))
+    indicator = to_tensor((tensor > constant).astype(float), requires_grad=False)
     indices = ascii_letters[: len(tensor.shape)]
     subscripts = f"{indices},{indices}->{indices}"
 
diff --git a/tests/test_loss.py b/tests/test_loss.py
index ef252c8..0cfc0f7 100644
--- a/tests/test_loss.py
+++ b/tests/test_loss.py
@@ -1,8 +1,10 @@
 import numpy as np
+from matplotlib import pyplot as plt
 
 from tricycle_v2.loss import mean_squared_error
+from tricycle_v2.ops import repeat
+from tricycle_v2.reduce import radd
 from tricycle_v2.tensor import to_tensor
-from tricycle_v2.ops import repeat 
 
 
 def test_can_mean_square_error():
@@ -18,24 +20,36 @@ def test_can_mean_square_error():
 def test_can_linear_regression():
     np.random.seed(42)
 
-    x = np.linspace(-10, 10, 201)
-    y = x * 2 + 1 + np.random.normal(loc=0, scale=0.01, size=201)
+    n = 10
+    learning_rate = 1e-2
+    x = np.linspace(-10, 10, n)
+    y = x * 2 + 1 + np.random.normal(loc=0, scale=0.01, size=n)
 
-    x = to_tensor(x.reshape(-1, 1))
-    y = to_tensor(y)
+    x = to_tensor(x.reshape(-1, 1), requires_grad=False, name="x")
+    y = to_tensor(y.reshape(-1, 1), requires_grad=False, name="y")
 
-    slope = to_tensor([0.01])
-    intercept = to_tensor(0.01)
+    slope = to_tensor([0.01], name="slope")
+    intercept = to_tensor([0.01], name="intercept")
 
+    losses = []
     for _ in range(100):
-        repeated_slope = repeat("i->ji", slope, (x.shape[0],))
-        repeated_intercept = repeat("i->ji", intercept, (x.shape[0],))
+        repeated_slope = repeat("j->ij", slope, x.shape)
+        repeated_intercept = repeat("j->ij", intercept, x.shape)
 
         y_pred = x * repeated_slope + repeated_intercept
-        loss = mean_squared_error(y, y_pred)
+        mse = mean_squared_error(y, y_pred)
+        loss = radd(mse, "i->") / y.shape[0]
+
+        losses.append(loss)
 
         loss.backward()
-        breakpoint()
 
-        slope -= slope.grad
-        intercept -= intercept.grad
+        slope = to_tensor(slope - slope.grad * learning_rate, name="slope")
+        intercept = to_tensor(
+            intercept - intercept.grad * learning_rate, name="intercept"
+        )
+
+    _, ax = plt.subplots()
+    ax.plot(losses)
+    ax.set_yscale("log")
+    plt.show()