Skip to content

Commit

Permalink
second recording
Browse files Browse the repository at this point in the history
  • Loading branch information
joelgrus committed Nov 21, 2017
1 parent ed10f0c commit 7c4c368
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 56 deletions.
6 changes: 3 additions & 3 deletions README.md
Expand Up @@ -24,9 +24,9 @@ type hinting, Python 3.6, and of course deep learning.
# Here's the plan:

1. Tensors
2. Layers
3. Neural Nets
4. Loss Functions
2. Loss Functions
3. Layers
4. Neural Nets
5. Optimizers
6. Data
7. Training
Expand Down
20 changes: 11 additions & 9 deletions fizzbuzz.py
@@ -1,20 +1,20 @@
"""
fizzbuzz is the terrible programming interview problem
where for each of the numbers from 1 to 100,
FizzBuzz is the following problem:
For each of the numbers 1 to 100:
* if the number is divisible by 3, print "fizz"
* if the number is divisible by 5, print "buzz"
* if the number is divisible by 15, print "fizzbuzz"
* otherwise just print the number itself
* otherwise, just print the number
"""
from typing import List

import numpy as np

from joelnet.train import train
from joelnet.nn import NeuralNet
from joelnet.layers import Linear, Tanh
from joelnet.optim import SGD
from joelnet.train import train

def fizz_buzz_encode(x: int) -> List[int]:
if x % 15 == 0:
Expand All @@ -26,18 +26,21 @@ def fizz_buzz_encode(x: int) -> List[int]:
else:
return [1, 0, 0, 0]


def binary_encode(x: int) -> List[int]:
"""
return the 10 binary digits of x
10 digit binary encoding of x
"""
return [x >> i & 1 for i in range(10)]

inputs = np.array([
binary_encode(x) for x in range(101, 1024)
binary_encode(x)
for x in range(101, 1024)
])

targets = np.array([
fizz_buzz_encode(x) for x in range(101, 1024)
fizz_buzz_encode(x)
for x in range(101, 1024)
])

net = NeuralNet([
Expand All @@ -53,8 +56,7 @@ def binary_encode(x: int) -> List[int]:
optimizer=SGD(lr=0.001))

for x in range(1, 101):
inputs = binary_encode(x)
predicted = net.forward(inputs)
predicted = net.forward(binary_encode(x))
predicted_idx = np.argmax(predicted)
actual_idx = np.argmax(fizz_buzz_encode(x))
labels = [str(x), "fizz", "buzz", "fizzbuzz"]
Expand Down
9 changes: 6 additions & 3 deletions joelnet/data.py
@@ -1,19 +1,21 @@
"""
We will train our networks by feeding
batches of data through them
We'll feed inputs into our network in batches.
So here are some tools for iterating over data in batches.
"""
from typing import NamedTuple, Iterator
from typing import Iterator, NamedTuple

import numpy as np

from joelnet.tensor import Tensor

Batch = NamedTuple("Batch", [("inputs", Tensor), ("targets", Tensor)])


class DataIterator:
def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
raise NotImplementedError


class BatchIterator(DataIterator):
def __init__(self, batch_size: int = 32, shuffle: bool = True) -> None:
self.batch_size = batch_size
Expand All @@ -23,6 +25,7 @@ def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
starts = np.arange(0, len(inputs), self.batch_size)
if self.shuffle:
np.random.shuffle(starts)

for start in starts:
end = start + self.batch_size
batch_inputs = inputs[start:end]
Expand Down
49 changes: 32 additions & 17 deletions joelnet/layers.py
@@ -1,11 +1,10 @@
"""
Our neural nets will be composed of layers.
We will push inputs forward through the layers
and propagate gradients backward through the layers.abs
Our neural nets will be made up of layers.
Each layer needs to pass its inputs forward
and propagate gradients backward. For example,
a neural net might look like
A sample neural net might look like
inputs -> linear -> tanh -> linear -> output
inputs -> Linear -> Tanh -> Linear -> output
"""
from typing import Dict, Callable

Expand All @@ -15,53 +14,65 @@


class Layer:
"""
base class for layers
"""
def __init__(self) -> None:
self.params: Dict[str, Tensor] = {}
self.grads: Dict[str, Tensor] = {}

def forward(self, inputs: Tensor) -> Tensor:
"""
Produce the outputs corresponding to these inputs
"""
raise NotImplementedError

def backward(self, grad: Tensor) -> Tensor:
"""
Backpropagate this gradient through the layer
"""
raise NotImplementedError


class Linear(Layer):
"""
A linear layer computes
output = input @ w + b
computes output = inputs @ w + b
"""
def __init__(self, input_size: int, output_size: int) -> None:
# inputs will be (batch_size, input_size)
# outputs will be (batch_size, output_size)
super().__init__()
self.params["w"] = np.random.randn(input_size, output_size)
self.params["b"] = np.random.randn(output_size)

def forward(self, inputs: Tensor) -> Tensor:
# batch_size, input_size = inputs.shape
"""
outputs = inputs @ w + b
"""
self.inputs = inputs
return inputs @ self.params["w"] + self.params["b"]

def backward(self, grad: Tensor) -> Tensor:
"""
if y = f(x) and x = a * b
if y = f(x) and x = a * b + c
then dy/da = f'(x) * b
then dy/db = f'(x) * a
and dy/db = f'(x) * a
and dy/dc = f'(x)
if now we have x = a @ b
if y = f(x) and x = a @ b + c
then dy/da = f'(x) @ b.T
and dy/db = a.T @ f'(x)
and dy/dc = f'(x)
"""
self.grads["b"] = np.sum(grad, axis=0)
self.grads["w"] = self.inputs.T @ grad
return grad @ self.params["w"].T


F = Callable[[Tensor], Tensor]

class Activation(Layer):
"""
An activation layer just applies a function
elementwise to its inputs
"""
def __init__(self, f: F, f_prime: F) -> None:
super().__init__()
self.f = f
Expand All @@ -72,6 +83,10 @@ def forward(self, inputs: Tensor) -> Tensor:
return self.f(inputs)

def backward(self, grad: Tensor) -> Tensor:
"""
if y = f(x) and x = g(z)
then dy/dz = f'(x) * g'(z)
"""
return self.f_prime(self.inputs) * grad


Expand All @@ -84,5 +99,5 @@ def tanh_prime(x: Tensor) -> Tensor:


class Tanh(Activation):
def __init__(self) -> None:
def __init__(self):
super().__init__(tanh, tanh_prime)
13 changes: 5 additions & 8 deletions joelnet/loss.py
@@ -1,10 +1,6 @@
"""
We will train our models using loss functions
that indicate how good or bad our predictions are
for known input/output pairs. Then we can use the
gradients of this loss function with respect to the
various parameters of the net to adjust the parameters
and make our predictions better
A loss function measures how good our predictions are,
we can use this to adjust the parameters of our network
"""
import numpy as np

Expand All @@ -17,10 +13,11 @@ def loss(self, predicted: Tensor, actual: Tensor) -> float:
def grad(self, predicted: Tensor, actual: Tensor) -> Tensor:
raise NotImplementedError


class MSE(Loss):
"""
This is actually total squared error
not mean squared error
MSE is mean squared error, although we're
just going to do total squared error
"""
def loss(self, predicted: Tensor, actual: Tensor) -> float:
return np.sum((predicted - actual) ** 2)
Expand Down
7 changes: 4 additions & 3 deletions joelnet/nn.py
@@ -1,13 +1,14 @@
"""
A neural net is just a series of layers.
In fact, it behaves a lot like a layer itself
although we're not going to make it one.
A NeuralNet is just a collection of layers.
It behaves a lot like a layer itself, although
we're not going to make it one.
"""
from typing import Sequence, Iterator, Tuple

from joelnet.tensor import Tensor
from joelnet.layers import Layer


class NeuralNet:
def __init__(self, layers: Sequence[Layer]) -> None:
self.layers = layers
Expand Down
11 changes: 5 additions & 6 deletions joelnet/optim.py
@@ -1,18 +1,17 @@
"""
An optimizer uses the computed gradients
to adjust the parameters of a neural net
We use an optimizer to adjust the parameters
of our network based on the gradients computed
during backpropagation
"""

from joelnet.nn import NeuralNet
from joelnet.tensor import Tensor


class Optimizer:
def step(self, net: NeuralNet) -> None:
raise NotImplementedError


class SGD(Optimizer):
def __init__(self, lr: float) -> None:
def __init__(self, lr: float = 0.01) -> None:
self.lr = lr

def step(self, net: NeuralNet) -> None:
Expand Down
4 changes: 2 additions & 2 deletions joelnet/train.py
@@ -1,5 +1,5 @@
"""
Here's the function for training neural nets
Here's a function that can train a neural net
"""

from joelnet.tensor import Tensor
Expand All @@ -15,7 +15,7 @@ def train(net: NeuralNet,
num_epochs: int = 5000,
iterator: DataIterator = BatchIterator(),
loss: Loss = MSE(),
optimizer: Optimizer = SGD(lr=0.01)) -> None:
optimizer: Optimizer = SGD()) -> None:
for epoch in range(num_epochs):
epoch_loss = 0.0
for batch in iterator(inputs, targets):
Expand Down
10 changes: 5 additions & 5 deletions xor.py
@@ -1,17 +1,17 @@
"""
The canonical example of a function that can't
be learned by a linear layer alone is XOR.
The canonical example of a function that can't be
learned with a simple linear model is XOR
"""
import numpy as np

from joelnet.train import train
from joelnet.nn import NeuralNet
from joelnet.layers import Linear, Tanh
from joelnet.train import train

inputs = np.array([
[0, 0],
[0, 1],
[1, 0],
[0, 1],
[1, 1]
])

Expand All @@ -28,7 +28,7 @@
Linear(input_size=2, output_size=2)
])

train(net, inputs, targets, num_epochs=5000)
train(net, inputs, targets)

for x, y in zip(inputs, targets):
predicted = net.forward(x)
Expand Down

0 comments on commit 7c4c368

Please sign in to comment.