From d73bc0c4de72206ddd8e5518c6f4340bab0840bd Mon Sep 17 00:00:00 2001 From: benedictclarkson1 Date: Sun, 14 Jan 2024 18:32:21 +0000 Subject: [PATCH] corrected some bugs --- src/tricycle_v2/binary.py | 3 +-- src/tricycle_v2/loss.py | 7 ++++--- src/tricycle_v2/ops.py | 6 +++--- src/tricycle_v2/reduce.py | 6 ++++-- src/tricycle_v2/tensor.py | 29 +++++++++++++++++----------- src/tricycle_v2/unary.py | 4 ++-- tests/test_loss.py | 40 ++++++++++++++++++++++++++------------- 7 files changed, 59 insertions(+), 36 deletions(-) diff --git a/src/tricycle_v2/binary.py b/src/tricycle_v2/binary.py index 81f8f14..5e22458 100644 --- a/src/tricycle_v2/binary.py +++ b/src/tricycle_v2/binary.py @@ -32,8 +32,7 @@ def bsub(tensor_1: Tensor, tensor_2: Tensor) -> Tensor: """ assert tensor_1.shape == tensor_2.shape - tensor_2_neg = umul(tensor_2, -1) - return badd(tensor_1, tensor_2_neg) + return badd(tensor_1, umul(tensor_2, -1)) def bmul(tensor_1: Tensor, tensor_2: Tensor) -> Tensor: diff --git a/src/tricycle_v2/loss.py b/src/tricycle_v2/loss.py index f19c5b9..2b6fdf1 100644 --- a/src/tricycle_v2/loss.py +++ b/src/tricycle_v2/loss.py @@ -1,15 +1,16 @@ from string import ascii_lowercase -from tricycle_v2.tensor import Tensor +from tricycle_v2.binary import bsub from tricycle_v2.reduce import radd +from tricycle_v2.tensor import Tensor def mean_squared_error(y_true: Tensor, y_pred: Tensor) -> Tensor: """ Calcuate the mean square error along the final index of a tensor """ - square_error = (y_true - y_pred) ** 2 + square_error = (y_true - y_pred)**2 indices = ascii_lowercase[: len(square_error.shape)] subscript = f"{indices}->{indices[:-1]}" total_error = radd(square_error, subscript) - return total_error / y_true.shape[-1] + return total_error / square_error.shape[-1] diff --git a/src/tricycle_v2/ops.py b/src/tricycle_v2/ops.py index ddaa7b9..761b984 100644 --- a/src/tricycle_v2/ops.py +++ b/src/tricycle_v2/ops.py @@ -42,12 +42,12 @@ def repeat(subscripts, tensor, out_shape): one_indices = "" one_shape = [] - for i, out_idx in enumerate(output): + for size, out_idx in zip(out_shape, output): if out_idx not in index: one_indices += out_idx - one_shape.append(out_shape[i]) + one_shape.append(size) - ones = to_tensor(np.ones(one_shape)) + ones = to_tensor(np.ones(one_shape), requires_grad=False) new_subscript = f"{one_indices},{index}->{output}" return einsum(new_subscript, ones, tensor) diff --git a/src/tricycle_v2/reduce.py b/src/tricycle_v2/reduce.py index ac97014..ebbf7ee 100644 --- a/src/tricycle_v2/reduce.py +++ b/src/tricycle_v2/reduce.py @@ -1,7 +1,7 @@ import numpy as np -from tricycle_v2.ops import _parse_subscripts, einsum, to_tensor -from tricycle_v2.tensor import Tensor +from tricycle_v2.ops import _parse_subscripts, einsum +from tricycle_v2.tensor import Tensor, to_tensor def radd(tensor: Tensor, subscript: str): @@ -53,6 +53,7 @@ def rmax(tensor: Tensor, subscript: str): indicator = ( tensor == np.max(tensor, axis=tuple(reduce_along_axes), keepdims=True) ).astype(int) + indicator = to_tensor(indicator, requires_grad=False) new_subscript = f"{idx},{idx}->{output}" @@ -79,6 +80,7 @@ def rmin(tensor: Tensor, subscript: str): indicator = ( tensor == np.min(tensor, axis=tuple(reduce_along_axes), keepdims=True) ).astype(int) + indicator = to_tensor(indicator, requires_grad=False) new_subscript = f"{idx},{idx}->{output}" diff --git a/src/tricycle_v2/tensor.py b/src/tricycle_v2/tensor.py index 45b4708..a0692f6 100644 --- a/src/tricycle_v2/tensor.py +++ b/src/tricycle_v2/tensor.py @@ -1,7 +1,11 @@ +import logging +from collections import defaultdict from typing import Callable, Dict, List, Optional, Tuple import numpy as np +logger = logging.getLogger(__name__) + Op = Callable[..., "Tensor"] @@ -22,6 +26,7 @@ class Tensor(np.ndarray): def backward(self): stack: List[Tuple[Tensor, List[Op]]] = [(self, [])] leaves: Dict[int, Tensor] = {} + adjecency_matrix = defaultdict(list) # Find every route to a differentiable parameter while stack: @@ -38,11 +43,13 @@ def backward(self): else: for arg, op in zip(current_node.args, current_node.back_fn): + logger.info(f"{hash(current_node)=} {hash(arg)=} {op=}") if not arg.requires_grad: continue new_gradient = current_gradient + [op] stack.append((arg, new_gradient)) + adjecency_matrix[(hash(current_node))].append(hash(arg)) # calculate the gradient for each parameter for leaf in leaves.values(): @@ -62,7 +69,7 @@ def __hash__(self) -> int: return id(self) def __add__(self, other): - if isinstance(other, np.ndarray): + if isinstance(other, np.ndarray) and not isinstance(other, Tensor): other = to_tensor(other) if np.isscalar(other): from tricycle_v2.unary import uadd @@ -76,11 +83,10 @@ def __add__(self, other): raise NotImplementedError(f"Cannot add {type(self)} and {type(other)}") def __iadd__(self, other): - self = self + other - return self + return self + other def __sub__(self, other): - if isinstance(other, np.ndarray): + if isinstance(other, np.ndarray) and not isinstance(other, Tensor): other = to_tensor(other) if np.isscalar(other): from tricycle_v2.unary import usub @@ -95,11 +101,10 @@ def __sub__(self, other): raise NotImplementedError(f"Cannot sub {type(self)} and {type(other)}") def __isub__(self, other): - self = self - other - return self + return self - other def __mul__(self, other): - if isinstance(other, np.ndarray): + if isinstance(other, np.ndarray) and not isinstance(other, Tensor): other = to_tensor(other) if np.isscalar(other): from tricycle_v2.unary import umul @@ -115,11 +120,10 @@ def __mul__(self, other): raise NotImplementedError(f"Cannot mul {type(self)} and {type(other)}") def __imul__(self, other): - self = self * other - return self + return self * other def __truediv__(self, other): - if isinstance(other, np.ndarray): + if isinstance(other, np.ndarray) and not isinstance(other, Tensor): other = to_tensor(other) if np.isscalar(other): from tricycle_v2.unary import udiv @@ -133,6 +137,9 @@ def __truediv__(self, other): else: raise NotImplementedError(f"Cannot divide {type(self)} and {type(other)}") + def __itruediv__(self, other): + return self / other + def __floordiv__(self, _): raise NotImplementedError("Cannot floor divide") @@ -140,7 +147,7 @@ def __mod__(self, _): raise NotImplementedError("Cannot mod") def __pow__(self, other): - if isinstance(other, np.ndarray): + if isinstance(other, np.ndarray) and not isinstance(other, Tensor): other = to_tensor(other) if np.isscalar(other): from tricycle_v2.unary import upow diff --git a/src/tricycle_v2/unary.py b/src/tricycle_v2/unary.py index 3279b72..4ea8de9 100644 --- a/src/tricycle_v2/unary.py +++ b/src/tricycle_v2/unary.py @@ -66,7 +66,7 @@ def upow(tensor: Tensor, constant: float) -> Tensor: result = to_tensor(np.power(tensor, constant)) result.args = (tensor,) - coeff = to_tensor(np.power(tensor, constant - 1)) + coeff = to_tensor(np.power(tensor, constant - 1), requires_grad=False) coeff = umul(coeff, constant) assert coeff.shape == tensor.shape @@ -101,7 +101,7 @@ def umax(tensor: Tensor, constant: float) -> Tensor: result = to_tensor(np.maximum(tensor, constant)) - indicator = to_tensor((tensor > constant).astype(float)) + indicator = to_tensor((tensor > constant).astype(float), requires_grad=False) indices = ascii_letters[: len(tensor.shape)] subscripts = f"{indices},{indices}->{indices}" diff --git a/tests/test_loss.py b/tests/test_loss.py index ef252c8..0cfc0f7 100644 --- a/tests/test_loss.py +++ b/tests/test_loss.py @@ -1,8 +1,10 @@ import numpy as np +from matplotlib import pyplot as plt from tricycle_v2.loss import mean_squared_error +from tricycle_v2.ops import repeat +from tricycle_v2.reduce import radd from tricycle_v2.tensor import to_tensor -from tricycle_v2.ops import repeat def test_can_mean_square_error(): @@ -18,24 +20,36 @@ def test_can_mean_square_error(): def test_can_linear_regression(): np.random.seed(42) - x = np.linspace(-10, 10, 201) - y = x * 2 + 1 + np.random.normal(loc=0, scale=0.01, size=201) + n = 10 + learning_rate = 1e-2 + x = np.linspace(-10, 10, n) + y = x * 2 + 1 + np.random.normal(loc=0, scale=0.01, size=n) - x = to_tensor(x.reshape(-1, 1)) - y = to_tensor(y) + x = to_tensor(x.reshape(-1, 1), requires_grad=False, name="x") + y = to_tensor(y.reshape(-1, 1), requires_grad=False, name="y") - slope = to_tensor([0.01]) - intercept = to_tensor(0.01) + slope = to_tensor([0.01], name="slope") + intercept = to_tensor([0.01], name="intercept") + losses = [] for _ in range(100): - repeated_slope = repeat("i->ji", slope, (x.shape[0],)) - repeated_intercept = repeat("i->ji", intercept, (x.shape[0],)) + repeated_slope = repeat("j->ij", slope, x.shape) + repeated_intercept = repeat("j->ij", intercept, x.shape) y_pred = x * repeated_slope + repeated_intercept - loss = mean_squared_error(y, y_pred) + mse = mean_squared_error(y, y_pred) + loss = radd(mse, "i->") / y.shape[0] + + losses.append(loss) loss.backward() - breakpoint() - slope -= slope.grad - intercept -= intercept.grad + slope = to_tensor(slope - slope.grad * learning_rate, name="slope") + intercept = to_tensor( + intercept - intercept.grad * learning_rate, name="intercept" + ) + + _, ax = plt.subplots() + ax.plot(losses) + ax.set_yscale("log") + plt.show()