Skip to content

Commit

Permalink
Completed linear regression
Browse files Browse the repository at this point in the history
  • Loading branch information
bclarkson-code committed Jan 14, 2024
1 parent d73bc0c commit 777bff5
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 5 deletions.
12 changes: 12 additions & 0 deletions src/tricycle_v2/initialisers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import numpy as np

from tricycle_v2.ops import to_tensor


def init_xavier(shape, name: str = ""):
"""
Initialize a tensor with xavier/glorot initialisation
"""
f_in, f_out = shape
bound = np.sqrt(6) / np.sqrt(f_in + f_out)
return to_tensor(np.random.uniform(low=-bound, high=bound, size=shape), name=name)
24 changes: 24 additions & 0 deletions src/tricycle_v2/ops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import partial
from string import ascii_lowercase

import numpy as np

Expand Down Expand Up @@ -66,3 +67,26 @@ def nothing(tensor):
Return a tensor
"""
return tensor


def softmax(tensor):
"""
Apply softmax. The softmax is only applied to the final
dimension of the tensor
Note: the tensor is normalised for numeric stability
"""
from tricycle_v2.reduce import radd, rmax
from tricycle_v2.unary import uexp

indices = ascii_lowercase[: len(tensor.shape)]
reduce_subscript = f"{indices}->{indices[:-1]}"
# largest = rmax(tensor, reduce_subscript)

expand_subscript = f"{indices[:-1]}->{indices}"
# largest = repeat(expand_subscript, largest, tensor.shape)
normalised = tensor# - largest
exponentiated = uexp(normalised)

denom = radd(exponentiated, reduce_subscript)
denom = repeat(expand_subscript, denom, tensor.shape)
return exponentiated / denom
5 changes: 4 additions & 1 deletion src/tricycle_v2/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def backward(self):

else:
for arg, op in zip(current_node.args, current_node.back_fn):
logger.info(f"{hash(current_node)=} {hash(arg)=} {op=}")
# logger.info(f"{hash(current_node)=} {hash(arg)=} {op=}")
if not arg.requires_grad:
continue

Expand All @@ -53,15 +53,18 @@ def backward(self):

# calculate the gradient for each parameter
for leaf in leaves.values():
logger.info(f"leaf: {leaf}")
if leaf.grad_fn is None:
continue

for path in leaf.grad_fn:
grad = np.ones_like(self).view(Tensor)
grad.requires_grad = False

logger.info(grad)
for op in path:
grad = op(grad)
logger.info(grad)

leaf.grad = grad if leaf.grad is None else leaf.grad + grad

Expand Down
6 changes: 3 additions & 3 deletions src/tricycle_v2/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def udiv(arg_1: Union[Tensor, float], arg_2: Union[Tensor, float]) -> Tensor:

def umax(tensor: Tensor, constant: float) -> Tensor:
"""
Max a tensor by a constant, elementwise. The constant is not
differentiable.
If only a tensor is passed, find the max of the tensor.
If a constant is passed, find the max of the tensor and the constant, elementwise. The constant is not differentiable.
"""
assert isinstance(tensor, Tensor)
assert np.isscalar(constant)
Expand All @@ -120,7 +120,7 @@ def umin(tensor: Tensor, constant: float) -> Tensor:

result = to_tensor(np.minimum(tensor, constant))

indicator = to_tensor((tensor <= constant).astype(float))
indicator = to_tensor((tensor <= constant).astype(float), requires_grad=False)
indices = ascii_letters[: len(tensor.shape)]
subscripts = f"{indices},{indices}->{indices}"

Expand Down
32 changes: 32 additions & 0 deletions tests/test_composite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np

from tricycle_v2.ops import softmax
from tricycle_v2.reduce import radd
from tricycle_v2.tensor import to_tensor


def test_softmax():
in_tensor = to_tensor(np.arange(12).reshape(3, 4), name='in_tensor')

out_tensor = softmax(in_tensor)

assert out_tensor.shape == (3, 4)
assert np.allclose(radd(out_tensor, "ij->i"), [1, 1, 1])
assert np.allclose(
out_tensor,
np.array(
[
[0.0320586, 0.08714432, 0.23688282, 0.64391426],
[0.0320586, 0.08714432, 0.23688282, 0.64391426],
[0.0320586, 0.08714432, 0.23688282, 0.64391426],
]
),
)

out_tensor.backward()

breakpoint()
assert np.allclose(
in_tensor.grad,
np.ones_like(in_tensor),
)
85 changes: 84 additions & 1 deletion tests/test_loss.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import numpy as np
import pytest
from matplotlib import pyplot as plt
from sklearn.datasets import load_diabetes, load_linnerud
from sklearn.preprocessing import RobustScaler

from tricycle_v2.initialisers import init_xavier
from tricycle_v2.loss import mean_squared_error
from tricycle_v2.ops import repeat
from tricycle_v2.ops import einsum, repeat
from tricycle_v2.reduce import radd
from tricycle_v2.tensor import to_tensor

Expand All @@ -17,6 +21,7 @@ def test_can_mean_square_error():
assert np.allclose(mse, np.array([0, 2 / 3, 2 / 9]))


@pytest.mark.skip
def test_can_linear_regression():
np.random.seed(42)

Expand Down Expand Up @@ -53,3 +58,81 @@ def test_can_linear_regression():
ax.plot(losses)
ax.set_yscale("log")
plt.show()


@pytest.mark.skip
def test_linear_regression_multi_input():
X, y = load_diabetes(return_X_y=True)
x_scaler = RobustScaler()
y_scaler = RobustScaler()
X = x_scaler.fit_transform(X)
y = y_scaler.fit_transform(y.reshape(-1, 1))

X = to_tensor(X)
y = to_tensor(y)

learning_rate = 1e-1

slope = init_xavier((X.shape[1], 1), name="slope")
intercept = to_tensor([0], name="intercept")

losses = []
for _ in range(100):
repeated_intercept = repeat("j->ij", intercept, (X.shape[0], 1))

y_pred = einsum("ij,jk->ik", X, slope) + repeated_intercept
mse = mean_squared_error(y, y_pred)
loss = radd(mse, "i->") / y.shape[0]

losses.append(loss)

loss.backward()

slope = to_tensor(slope - slope.grad * learning_rate, name="slope")
intercept = to_tensor(
intercept - intercept.grad * learning_rate, name="intercept"
)

_, ax = plt.subplots()
ax.plot(losses)
ax.set_yscale("log")
plt.show()


@pytest.mark.skip
def test_linear_regression_multi_input_output():
X, y = load_linnerud(return_X_y=True)
x_scaler = RobustScaler()
y_scaler = RobustScaler()
X = x_scaler.fit_transform(X)
y = y_scaler.fit_transform(y)

X = to_tensor(X)
y = to_tensor(y)

learning_rate = 1e-1

slope = init_xavier((X.shape[1], y.shape[1]), name="slope")
intercept = to_tensor([-0.01, 0.01, 0.02], name="intercept")

losses = []
for _ in range(100):
repeated_intercept = repeat("k->ik", intercept, (X.shape[0], y.shape[1]))

y_pred = einsum("ij,jk->ik", X, slope) + repeated_intercept
mse = mean_squared_error(y, y_pred)
loss = radd(mse, "i->") / y.shape[0]

losses.append(loss)

loss.backward()

slope = to_tensor(slope - slope.grad * learning_rate, name="slope")
intercept = to_tensor(
intercept - intercept.grad * learning_rate, name="intercept"
)

_, ax = plt.subplots()
ax.plot(losses)
ax.set_yscale("log")
plt.show()
1 change: 1 addition & 0 deletions tests/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def test_can_umax():
)



def test_can_umin():
in_tensor = to_tensor(np.arange(12).reshape(3, 4))
out_tensor = umin(in_tensor, 4)
Expand Down

0 comments on commit 777bff5

Please sign in to comment.