Skip to content

Commit

Permalink
[SYSTEMML-1408] Add padding parameters to max-pooling layers
Browse files Browse the repository at this point in the history
This adds padding parameters to the max-pooling layers, along with the
associated tests.  Also, there are some general code formatting updates.

Closes #434.
  • Loading branch information
dusenberrymw committed Mar 22, 2017
1 parent 16e9909 commit 15ccb7c
Show file tree
Hide file tree
Showing 26 changed files with 537 additions and 247 deletions.
8 changes: 4 additions & 4 deletions scripts/staging/SystemML-NN/examples/get_mnist_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -23,6 +23,6 @@
DIR="$(cd "$(dirname "$0")" && pwd)"
mkdir -p $DIR/data/mnist/
cd $DIR/data/mnist/
curl -O http://pjreddie.com/media/files/mnist_train.csv
curl -O http://pjreddie.com/media/files/mnist_test.csv
curl -O https://pjreddie.com/media/files/mnist_train.csv
curl -O https://pjreddie.com/media/files/mnist_test.csv

4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# Outputs:
# - probs: File containing class probability predictions for each
# image.
#
#
# Data:
# The X file should contain images of handwritten digits,
# where each example is a 28x28 pixel image of grayscale values in
Expand Down Expand Up @@ -79,7 +79,7 @@ b3 = read($model_dir+"/b3")
W4 = read($model_dir+"/W4")
b4 = read($model_dir+"/b4")

# Predict classes
# Predict classes
probs = mnist_lenet::predict(X, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)

# Output results
Expand Down
2 changes: 1 addition & 1 deletion scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# - W1, W2, W3, W4: Files containing the trained weights of the model.
# - b1, b2, b3, b4: Files containing the trained biases of the model.
# - accuracy: File containing the final accuracy on the test data.
#
#
# Data:
# The MNIST dataset contains labeled images of handwritten digits,
# where each example is a 28x28 pixel image of grayscale values in
Expand Down
41 changes: 27 additions & 14 deletions scripts/staging/SystemML-NN/examples/mnist_lenet.dml
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,17 @@ train = function(matrix[double] X, matrix[double] y,

# Compute forward pass
## layer 1: conv1 -> relu1 -> pool1
[outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
[outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
pad, pad)
outr1 = relu::forward(outc1)
[outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
[outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
## layer 2: conv2 -> relu2 -> pool2
[outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
[outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
stride, stride, pad, pad)
outr2 = relu::forward(outc2)
[outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
[outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
## layer 3: affine3 -> relu3 -> dropout
outa3 = affine::forward(outp2, W3, b3)
outr3 = relu::forward(outa3)
Expand All @@ -146,7 +150,8 @@ train = function(matrix[double] X, matrix[double] y,
accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val))

# Output results
print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: "
+ accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
}

# Compute data backward pass
Expand All @@ -160,13 +165,17 @@ train = function(matrix[double] X, matrix[double] y,
douta3 = relu::backward(doutr3, outa3)
[doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)
## layer 2: conv2 -> relu2 -> pool2
doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
doutc2 = relu::backward(doutr2, outc2)
[doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
[doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1,
Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
## layer 1: conv1 -> relu1 -> pool1
doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
doutc1 = relu::backward(doutr1, outc1)
[dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
[dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win,
Hf, Wf, stride, stride, pad, pad)

# Compute regularization backward pass
dW1_reg = l2_reg::backward(W1, lambda)
Expand Down Expand Up @@ -251,13 +260,17 @@ predict = function(matrix[double] X, int C, int Hin, int Win,

# Compute forward pass
## layer 1: conv1 -> relu1 -> pool1
[outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
[outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
pad, pad)
outr1 = relu::forward(outc1)
[outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
[outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
## layer 2: conv2 -> relu2 -> pool2
[outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
[outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
stride, stride, pad, pad)
outr2 = relu::forward(outc2)
[outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
[outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
strideh=2, stridew=2, pad=0, pad=0)
## layer 3: affine3 -> relu3
outa3 = affine::forward(outp2, W3, b3)
outr3 = relu::forward(outa3)
Expand All @@ -281,7 +294,7 @@ eval = function(matrix[double] probs, matrix[double] y)
*
* Inputs:
* - probs: Class probabilities, of shape (N, K).
* - y: Target matrix, of shape (N,
* - y: Target matrix, of shape (N, K).
*
* Outputs:
* - loss: Scalar loss, of shape (1).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# Outputs:
# - probs: File containing class probability predictions for each
# image.
#
#
# Data:
# The X file should contain images of handwritten digits,
# where each example is a 28x28 pixel image of grayscale values in
Expand Down Expand Up @@ -66,7 +66,7 @@ X = X / 255.0
W = read($model_dir+"/W")
b = read($model_dir+"/b")

# Predict classes
# Predict classes
probs = mnist_softmax::predict(X, W, b)

# Output results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# - W: File containing the trained weights of the model.
# - b: File containing the trained biases of the model.
# - accuracy: File containing the final accuracy on the test data.
#
#
# Data:
# The MNIST dataset contains labeled images of handwritten digits,
# where each example is a 28x28 pixel image of grayscale values in
Expand Down
4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/nn/layers/affine.dml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ init = function(int D, int M)
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
*
* We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
* which limits the magnification of inputs/gradients during
* forward/backward passes by scaling unit-Gaussian weights by a
Expand All @@ -84,6 +84,6 @@ init = function(int D, int M)
* - b: Biases vector, of shape (1, M).
*/
W = rand(rows=D, cols=M, pdf="normal") * sqrt(2.0/D)
b = matrix(0, rows=1, cols=M)
b = matrix(0, rows=1, cols=M)
}

12 changes: 6 additions & 6 deletions scripts/staging/SystemML-NN/nn/layers/conv.dml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
F = nrow(W)
Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)

# Create output volume
out = matrix(0, rows=N, cols=F*Hout*Wout)

Expand Down Expand Up @@ -124,7 +124,7 @@ backward = function(matrix[double] dout, int Hout, int Wout,
*/
N = nrow(X)
F = nrow(W)

# Create gradient volumes
# Note: Create convenience gradient volumes for dW and db that will
# allow for one gradient to be stored per example, allowing for
Expand All @@ -151,8 +151,8 @@ backward = function(matrix[double] dout, int Hout, int Wout,

# Compute dX
dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout)
dXn_padded =
util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew, "add")
dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf,
strideh, stridew, "add")
dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw)
dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape
}
Expand All @@ -170,7 +170,7 @@ init = function(int F, int C, int Hf, int Wf)
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
*
* We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
* which limits the magnification of inputs/gradients during
* forward/backward passes by scaling unit-Gaussian weights by a
Expand All @@ -187,6 +187,6 @@ init = function(int F, int C, int Hf, int Wf)
* - b: Biases vector, of shape (F, 1).
*/
W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
b = matrix(0, rows=F, cols=1)
b = matrix(0, rows=F, cols=1)
}

9 changes: 4 additions & 5 deletions scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
*/
N = nrow(X)
F = nrow(W)
# TODO: We should eliminate this in a seperate PR
Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)

# Convolution - built-in implementation
out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf],
stride=[strideh,stridew], padding=[padh,padw])
Expand Down Expand Up @@ -105,7 +104,7 @@ backward = function(matrix[double] dout, int Hout, int Wout,
*/
N = nrow(X)
F = nrow(W)

# Partial derivatives for convolution - built-in implementation
dW = conv2d_backward_filter(X, dout, stride=[strideh,stridew], padding=[padh,padw],
input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf])
Expand All @@ -123,7 +122,7 @@ init = function(int F, int C, int Hf, int Wf)
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
*
* We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
* which limits the magnification of inputs/gradients during
* forward/backward passes by scaling unit-Gaussian weights by a
Expand All @@ -140,6 +139,6 @@ init = function(int F, int C, int Hf, int Wf)
* - b: Biases vector, of shape (F, 1).
*/
W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
b = matrix(0, rows=F, cols=1)
b = matrix(0, rows=F, cols=1)
}

4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* vectors of class probs.
* L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
forward = function(matrix[double] pred, matrix[double] y)
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for a cross-entropy loss function. The
Expand All @@ -50,7 +50,7 @@ forward = function(matrix[double] pred, matrix[double] y)
loss = sum(losses) / N
}

backward = function(matrix[double] pred, matrix[double] y)
backward = function(matrix[double] pred, matrix[double] y)
return (matrix[double] dpred) {
/*
* Computes the backward pass of a cross-entropy loss function. The
Expand Down
4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/nn/layers/dropout.dml
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ forward = function(matrix[double] X, double p, int seed)
# to create a dropout mask. Fortunately, SystemML has a `sparsity` parameter on
# the `rand` function that allows use to create a mask directly.
if (seed == -1) {
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p)
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p)
}
else {
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed)
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed)
}
out = X * mask / p
}
Expand Down
4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* L_i = sum_j(abs((pred_i)_j - (y_i)_j)) for all j.
* L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
forward = function(matrix[double] pred, matrix[double] y)
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for an L1 loss function. The inputs
Expand All @@ -46,7 +46,7 @@ forward = function(matrix[double] pred, matrix[double] y)
loss = sum(losses) / N
}

backward = function(matrix[double] pred, matrix[double] y)
backward = function(matrix[double] pred, matrix[double] y)
return (matrix[double] dpred) {
/*
* Computes the backward pass for an L1 loss function. The inputs
Expand Down
2 changes: 1 addition & 1 deletion scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ backward = function(matrix[double] X, double lambda) return (matrix[double] dX)
* - lambda: Regularization strength.
*
* Outputs:
* - dX: Gradient wrt X, of same shape as X.
* - dX: Gradient wrt X, of same shape as X.
*/
dX = lambda * sign(X)
}
Expand Down
4 changes: 2 additions & 2 deletions scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* L_i = (1/2) 2norm(pred_i - y_i)^2
* L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
forward = function(matrix[double] pred, matrix[double] y)
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for an L2 loss function. The inputs
Expand All @@ -46,7 +46,7 @@ forward = function(matrix[double] pred, matrix[double] y)
loss = sum(losses) / N
}

backward = function(matrix[double] pred, matrix[double] y)
backward = function(matrix[double] pred, matrix[double] y)
return (matrix[double] dpred) {
/*
* Computes the backward pass for an L2 loss function. The inputs
Expand Down
2 changes: 1 addition & 1 deletion scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ backward = function(matrix[double] X, double lambda) return (matrix[double] dX)
* - lambda: Regularization strength.
*
* Outputs:
* - dX: Gradient wrt X, of same shape as X.
* - dX: Gradient wrt X, of same shape as X.
*/
dX = lambda * X
}
Expand Down
6 changes: 3 additions & 3 deletions scripts/staging/SystemML-NN/nn/layers/log_loss.dml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
* Log loss function.
*
* L_i = -y_i*log(pred_i) - (1-y_i)*log(1-pred_i), where y_i is a
* binary target, and pred_i is a probability of y=1.
* binary target, and pred_i is a probability of y=1.
* L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
forward = function(matrix[double] pred, matrix[double] y)
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for a log loss function.
Expand All @@ -48,7 +48,7 @@ forward = function(matrix[double] pred, matrix[double] y)
loss = sum(losses) / N
}

backward = function(matrix[double] pred, matrix[double] y)
backward = function(matrix[double] pred, matrix[double] y)
return (matrix[double] dpred) {
/*
* Computes the backward pass for a log loss function.
Expand Down

0 comments on commit 15ccb7c

Please sign in to comment.