In [324]:
import os
from PIL import Image
import numpy as np
import random
import math

IMAGE_DIR = "images"
FOLDERS = [f for f in os.listdir(IMAGE_DIR) if "DS_Store" not in f]

In [501]:
train, valid, test = [], [], []
valid_cutoff = .8
test_cutoff = .9

for i, folder in enumerate(FOLDERS):
    fname = os.path.join(IMAGE_DIR, folder)
    for im in os.listdir(fname):
        impath = os.path.join(fname, im)
        img = Image.open(impath)
        data = np.asarray(img, dtype="int32")
        # Scale values from -1 to 1
        data = ((data / 255) - .5) / .5
        data = np.moveaxis(data, -1, 0)
        target = np.zeros((1, len(FOLDERS)))
        target[0,i] = 1
        row = (data, target, folder, )

        split = random.random()
        if split > test_cutoff:
            test.append(row)
        elif split > valid_cutoff:
            valid.append(row)
        else:
            train.append(row)

In [436]:
def log_loss(predicted, actual):
    tol = 1e-6
    cross_entropy = actual * np.log(predicted + tol)
    return -np.sum(cross_entropy)

In [477]:
def log_loss_grad(predicted, actual):
    return predicted - actual

In [396]:
def softmax(preds):
    tol = 1e-6
    preds = np.exp(preds - np.max(preds))
    return preds / (np.sum(preds) - tol)

In [330]:
def init_layers(layer_defs):
    layers = []
    for i in range(1, len(layer_defs)):
        if "input_units" in layer_defs[i]:
            last_units = layer_defs[i]["input_units"]
        else:
            last_units = layer_defs[i-1]["units"]

        biases = np.ones((1,layer_defs[i]["units"]))
        if layer_defs[i]["type"] == "cnn":
            weights = np.random.rand(layer_defs[i-1]["units"], layer_defs[i]["units"], layer_defs[i]["kernel_size"], layer_defs[i]["kernel_size"])
        else:
            weights = np.random.rand(last_units, layer_defs[i]["units"])

        weights = weights / 5 - .1

        layers.append([
            weights,
            biases,
            layer_defs[i]["type"]
        ])
    return layers

In [331]:
def unroll_image(image, kernel_x, kernel_y):
    x_size = (image.shape[0] - (kernel_x - 1))
    y_size = (image.shape[1] - (kernel_y - 1))
    rows =  x_size * y_size
    unrolled = np.zeros((rows, kernel_x * kernel_y))
    for x in range(0, x_size):
        for y in range(0, y_size):
            unrolled[y + (x * y_size),:] = image[x:(x+kernel_x),y:(y+kernel_y)].reshape((1,kernel_x * kernel_y))
    return unrolled

def convolve(image, kernel):
    return np.matmul(image, kernel.reshape(kernel.shape[0] * kernel.shape[1], 1))

In [523]:
def forward(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):
        if layers[i][2] == "cnn":
            channels, next_channels, kernel_x, kernel_y = layers[i][0].shape

            new_x = batch.shape[1] - (kernel_x - 1)
            new_y = batch.shape[2] - (kernel_y - 1)
            next_batch = np.zeros((next_channels, new_x , new_y))
            for channel in range(channels):
                unrolled = unroll_image(batch[channel,:], kernel_x, kernel_y)
                for next_channel in range(next_channels):
                    kernel = layers[i][0][channel, next_channel, :]
                    mult = convolve(unrolled, kernel).reshape(new_x, new_y)
                    next_batch[next_channel,:] += mult
            next_batch /= batch.shape[0]

            hidden.append(next_batch.copy())
            next_batch = np.maximum(next_batch, 0)
            batch = next_batch
        else:
            if layers[i-1][2] == "cnn":
                batch = batch.reshape(batch.shape[0], batch.shape[1] * batch.shape[2])
            batch = np.matmul(batch, layers[i][0]) + layers[i][1]
            hidden.append(batch.copy())
            if i < len(layers) - 1:
                batch = np.maximum(batch, 0)

    return batch, hidden

In [524]:
def backward(layers, hidden, grad, lr, verbose=False):
    for i in range(len(layers)-1, -1, -1):
        print(f"Layer {i}") if verbose else None

        if layers[i][2] == "cnn":
            grad = grad.reshape(hidden[i+1].shape)
            if i != len(layers) - 1:
                grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))
            _, kernel_x, kernel_y = grad.shape
            print(f"Grad shape {grad.shape}") if verbose else None

            new_grad = np.zeros(hidden[i].shape)
            for channel in range(hidden[i].shape[0]):
                # With multi-channel output, you need to loop across the output grads to link to input channel kernels
                # Each kernel gets a unique update
                for next_channel in range(hidden[i+1].shape[0]):
                    # Kernel update
                    flat_input = unroll_image(hidden[i][channel,:], kernel_x, kernel_y)
                    channel_grad = grad[next_channel,:]
                    flat_grad = channel_grad.reshape(math.prod(channel_grad.shape), 1)
                    k_grad = np.matmul(flat_input, flat_grad).reshape(layers[i][0].shape[2], layers[i][0].shape[3])
                    grad_norm = math.prod(channel_grad.shape)
                    layers[i][0][channel,next_channel,:] -= (k_grad * lr) / grad_norm
                    print(f"k_grad: {k_grad.shape}") if verbose else None

                    # Grad to lower layer
                    flipped_kernel = np.flip(layers[i][0][channel,next_channel,:], axis=[0,1])
                    padded_x = (flipped_kernel.shape[0] - 1)
                    padded_y = (flipped_kernel.shape[1] - 1)
                    padded_grad = np.pad(channel_grad, ((padded_x, padded_x),  (padded_y, padded_y)))
                    flat_padded = unroll_image(padded_grad, flipped_kernel.shape[0], flipped_kernel.shape[1])
                    flat_kernel = flipped_kernel.reshape(math.prod(flipped_kernel.shape), 1)
                    updated_grad = np.matmul(flat_padded, flat_kernel).reshape(hidden[i].shape[1], hidden[i].shape[2])
                    new_grad[channel, :] = updated_grad
            grad = new_grad
        else:
            if i != len(layers) - 1:
                grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))
            grad = grad.T
            print(f"starting grad: {grad.shape}") if verbose else None
            w_grad = np.matmul(grad, hidden[i].reshape(1, math.prod(hidden[i].shape))).T
            print(f"w_grad: {w_grad.shape}") if verbose else None
            b_grad = grad.T

            layers[i][0] -= w_grad * lr
            layers[i][1] -= b_grad * lr

            grad = np.matmul(layers[i][0], grad).T
            print(f"ending grad: {grad.shape}") if verbose else None
    return layers

In [None]:
layer_defs = [
    {"type": "input", "units": 3},
    {"type": "cnn", "kernel_size": 2, "units": 2},
    {"type": "cnn", "kernel_size": 3, "units": 1},
    {"type": "dense", "input_units": 253 * 253, "units": 5}
]
lr = 5e-3
epochs = 2

layers = init_layers(layer_defs)
for epoch in range(epochs+1):
    epoch_loss = np.zeros(train[0][1].shape)
    for i, img in enumerate(train):
        image, target, label = img
        batch, hidden = forward(image, layers)

        grad = log_loss_grad(softmax(batch), target)
        epoch_loss += grad
        layers = backward(layers, hidden, grad, lr)
        if i% 250 == 0:
            print(f"Batch loss: {grad}")

    valid_preds = []
    match = np.zeros(len(valid))
    for i, img in enumerate(valid):
        image, target, label = img
        valid_pred, _ = forward(image, layers)
        valid_preds.append(softmax(valid_pred))
        match[i] = np.argmax(valid_pred) == np.argmax(target)

    print(f"Epoch {epoch} loss: {match.sum() / match.shape[0]}")

Batch loss: [[-0.79666816  0.19627529  0.20398767  0.20401958  0.19238583]]


In [509]:
test_preds = []
match = np.zeros(len(test))
for i, img in enumerate(test):
    image, target, label = img
    test_pred, _ = forward(image, layers)
    test_preds.append(softmax(test_pred))
    match[i] = np.argmax(test_pred) == np.argmax(target)

match.sum() / match.shape[0]

0.2988505747126437

In [520]:
np.flip(layers[0][0][1,0,:], axis=[0,1])

array([[ 0.079473  , -0.05180835, -0.02376148],
       [-0.05118403,  0.05303721, -0.06934482],
       [ 0.06084819, -0.0972187 ,  0.06903352]])

In [492]:
from IPython.lib.display import Audio
import numpy as np

framerate = 4410
play_time_seconds = 3

t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
audio_data = np.sin(2*np.pi*300*t) + np.sin(2*np.pi*240*t)
Audio(audio_data, rate=framerate, autoplay=True)

In [463]:
from sklearn.metrics import log_loss as sk_loss

sk_loss(target, test_pred)

4.107825191113088e-15

In [475]:
log_loss(softmax(batch), target)

-1.9999989999250343e-06

In [439]:
(target - test_pred) * log_loss(target, test_pred)

array([[ 13.81552437,   0.        ,   0.        ,   0.        ,
        -13.81553819]])

In [414]:
test_pred

array([[0.      , 0.      , 0.      , 0.      , 1.000001]])

In [415]:
target

array([[1., 0., 0., 0., 0.]])

In [251]:
layer_defs = [
    {"type": "input", "units": 3},
    {"type": "cnn", "kernel_size": 3, "units": 1},
    {"type": "dense", "input_units": 254 * 254, "units": 5}
]

layers = init_layers(layer_defs)

In [252]:
layers, batch, hidden = forward(images[0][0], layers)
lr = 5e-4
grad = log_loss(softmax(batch), np.array([0,0,1,0,0]))

i = 1
grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))

In [253]:
grad = grad.T
w_grad = np.matmul(grad, hidden[i].reshape(1, math.prod(hidden[i].shape))).T
b_grad = grad.T
layers[i][0] -= (w_grad + layers[i][0] * .01) * lr
layers[i][1] -= b_grad * lr
grad = np.matmul(layers[i][0], grad).T

In [255]:
i = 0
grad = grad.reshape(hidden[i+1].shape)
grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))

In [256]:
_, kernel_x, kernel_y = grad.shape
flat_input = unroll_image(hidden[i][0,:], kernel_x, kernel_y)
flat_grad = grad.reshape(math.prod(grad.shape), 1)
k_grad = np.matmul(flat_input, flat_grad).reshape(1,1,layers[i][0].shape[2], layers[i][0].shape[3])
layers[i][0] -= k_grad * lr