Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Conv2D Layer #39

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ target_sources(afml
src/nn/Modules/Container.cpp
src/nn/Modules/Linear.cpp
src/nn/Modules/Loss.cpp
src/nn/Modules/Conv2D.cpp
src/nn/Modules/Module.cpp
src/nn/Init.cpp
)
Expand Down
6 changes: 6 additions & 0 deletions include/af/autograd/Functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,11 @@ namespace af {

Variable flat(const Variable &input);
Variable moddims(const Variable &input, const dim4 &dims);
Variable reorder(const Variable &input, int d0, int d1, int d2, int d3);

Variable unwrap(const Variable &input, int wx, int wy, int sx, int sy, int px, int py);
Variable wrap(const Variable &input, int ox, int oy, int wx, int wy, int sx, int sy, int px, int py);

Variable conv2d(const Variable &input, const Variable &weights, int wx, int wy, int sx, int sy, int px, int py);
}
}
1 change: 1 addition & 0 deletions include/af/nn/Modules.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
#include <af/nn/Modules/Container.hpp>
#include <af/nn/Modules/Activations.hpp>
#include <af/nn/Modules/Loss.hpp>
#include <af/nn/Modules/Conv2D.hpp>
37 changes: 37 additions & 0 deletions include/af/nn/Modules/Conv2D.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*******************************************************
* Copyright (c) 2017, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#pragma once

#include <af/nn/Modules/Module.hpp>

namespace af
{
namespace nn
{
class Conv2D : public Module
{
private:
bool m_bias;
int m_wx;
int m_wy;
int m_sx;
int m_sy;
int m_px;
int m_py;
public:
Conv2D(int wx, int wy, int sx, int sy, int px, int py, int n_in, int n_out, bool bias = true);

Conv2D(const autograd::Variable &w, int sx = 1, int sy = 1, int px = 0, int py = 0);

Conv2D(const autograd::Variable &w, const autograd::Variable &b, int sx = 1, int sy = 1, int px = 0, int py = 0);

autograd::Variable forward(const autograd::Variable &input);
};
}
}
138 changes: 138 additions & 0 deletions src/autograd/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,5 +414,143 @@ namespace af {
};
return Variable(result, {input}, grad_func);
}

Variable reorder(const Variable &input, int d0, int d1, int d2, int d3)
{
array res = reorder(input.array(), d0, d1, d2, d3);

int tmp[] = {d0, d1, d2, d3};
int tmp2[4];
for(int i = 0; i < 4; i++){
tmp2[tmp[i]] = i;
}
auto reverse = Variable(array(4, tmp2), false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverse is not being used anymore.


auto grad_func = [tmp2](std::vector<Variable> &inputs, const Variable &grad_output){
inputs[0].addGrad(reorder(grad_output, tmp2[0], tmp2[1], tmp2[2], tmp2[3]));
};
return Variable(res, {input, reverse}, grad_func);
}

Variable unwrap(const Variable &input, int wx, int wy, int sx, int sy, int px, int py)
{
array res = unwrap(input.array(), wx, wy, sx, sy, px, py);
auto grad_func = [wx, wy, sx, sy, px, py](std::vector<Variable> &inputs, const Variable &grad_output) {
dim4 d = inputs[0].dims();
inputs[0].addGrad(wrap(grad_output, d[0], d[1], wx, wy, sx, sy, px, py));
};
return Variable(res, {input}, grad_func);
}

Variable wrap(const Variable &input, int ox, int oy, int wx, int wy, int sx, int sy, int px, int py)
{
array res = wrap(input.array(), ox, oy, wx, wy, sx, sy, px, py);
auto grad_func = [wx, wy, sx, sy, px, py](std::vector<Variable> &inputs, const Variable &grad_output) {
inputs[0].addGrad(unwrap(grad_output, wx, wy, sx, sy, px, py));
};
return Variable(res, {input}, grad_func);
}

Variable conv2d(const Variable &input, const Variable &weights, int wx, int wy, int sx, int sy, int px, int py)
{
dim4 idims = input.array().dims(); // (x_i, y_i, c_i, n )
dim4 wdims = weights.array().dims(); // (wx, wy, c_i, c_o)

int x_i = idims[0]; //size of x dim of input
int y_i = idims[1]; //size of y dim of input
int c_i = idims[2]; //number of input channels
int n = idims[3]; //batch size (1 for now)

int x_o = (x_i + 2 * px - wx) / sx + 1; //size of x dim of output
int y_o = (y_i + 2 * py - wy) / sy + 1; //size of x dim of output
int c_o = wdims[3]; //number of output channels

array windows = unwrap(input.array(), wx, wy, sx, sy, px, py);

array lhs = moddims(
reorder(windows, 1, 0, 2, 3),
dim4(x_o * y_o, wx * wy * c_i, n, 1));
array rhs = moddims(weights.array(), dim4(wx * wy * c_i, c_o, 1, 1));

//TODO: This loop can be replaced with a batched matmult as soon as
//that is added to arrayfire
std::vector<array> out;
for(int i = 0; i < n; i++){
array res = matmul(lhs(span, span, i), rhs);
out.push_back(moddims(res , dim4(x_o, y_o, c_o, 1)));
}

//LOL @ C++ API - need this loop to have arbitrary batch size
array result = out[0];
for(int i = 1; i < n; i+=3){
int rem = n - i;
if(rem >= 3){
result = join(3, result, out[i], out[i+1], out[i+2]);
}else if(rem == 2){
result = join(3, result, out[i], out[i+1]);
break;
}else if(rem == 1){
result = join(3, result, out[i]);
break;
}else{
break;
}
}

auto grad_func = [wx, wy, sx, sy, px, py, c_i, n](std::vector<Variable> &inputs, const Variable &grad_output) {
dim4 odims = grad_output.array().dims();
dim4 wdims = inputs[1].array().dims();
dim4 idims = inputs[0].array().dims();

auto grad_out_reshape = moddims(grad_output, dim4(odims[0]*odims[1], odims[2], odims[3], 1));

auto weights_reshape = moddims(inputs[1], dim4(wdims[0]*wdims[1]*wdims[2], wdims[3], 1, 1));

//TODO: This really needs batched matmul...
//TODO: This doesn't work for n > 1
//TODO: Can these lines be shortened? - This seems like a large grad function - perhaps this
// could all be implemented in Conv2D::forward(). I had to implement the helper functions anyways
/*
std::vector<array> out;
for(int i = 0; i < n; i++){
auto a = matmulNT(grad_out_reshape(span, span, i), weights_reshape); //Problem is here - can't call () on Variable
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is all that is preventing me from having batches working.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can make the matmulXY functions in arrayfire-ml support batches for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be good.

auto adims = a.array().dims();
auto b = moddims(a, dim4(adims[0], wx*wy, c_i, adims[3]));
auto c = reorder(b, 1, 0, 2, 3);
out.push_pack(wrap(c, idims[0], idims[1], wx, wy, sx, sy, px, py));
}

array result = out[0];
for(int i = 1; i < n; i+=3){
int rem = n - i;
if(rem >= 3){
result = join(3, result, out[i], out[i+1], out[i+2]);
}else if(rem == 2){
result = join(3, result, out[i], out[i+1]);
break;
}else if(rem == 1){
result = join(3, result, out[i]);
break;
}else{
break;
}
}
*/
auto a = matmulNT(grad_out_reshape, weights_reshape);
auto adims = a.array().dims();
auto b = moddims(a, dim4(adims[0], wx*wy, c_i, adims[3]));
auto c = reorder(b, 1, 0, 2, 3);
inputs[0].addGrad(wrap(c, idims[0], idims[1], wx, wy, sx, sy, px, py));

auto d = matmulTN(inputs[2],grad_out_reshape);
inputs[1].addGrad(moddims(d, dim4(wx, wy, c_i, d.dims()[1])));

};
return Variable(result, {input, weights, Variable(lhs, false)}, grad_func);

}



}
}
78 changes: 78 additions & 0 deletions src/nn/Modules/Conv2D.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*******************************************************
* Copyright (c) 2017, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <af/autograd/Functions.hpp>
#include <af/nn/Init.hpp>
#include <af/nn/Modules/Conv2D.hpp>
//output will be ho x wo x no x n
namespace af
{
namespace nn
{
using namespace autograd;

Conv2D::Conv2D(int wx, int wy, int sx, int sy, int px, int py, int n_in, int n_out, bool bias) :
m_wx(wx),
m_wy(wy),
m_sx(sx),
m_sy(sy),
m_px(px),
m_py(py),
m_bias(bias)
{
auto w = nn::lecunNormal(dim4(wx, wy, n_in, n_out));
if (bias) {
auto b = nn::lecunNormal(dim4(1, 1, n_out, 1));
setParams({w, b});
} else {
setParams({w});
}
}

Conv2D::Conv2D(const Variable &w, int sx, int sy, int px, int py) :
m_sx(sx),
m_sy(sy),
m_px(px),
m_py(py),
m_bias(false),
Module({w})
{
dim4 pdims = w.array().dims();
m_wx = pdims[0];
m_wy = pdims[1];
}

Conv2D::Conv2D(const Variable &w, const Variable &b, int sx, int sy, int px, int py) :
m_sx(sx),
m_sy(sy),
m_px(px),
m_py(py),
m_bias(true),
Module({w, b})
{
/*if (b.array().dims(0) != w.array().dims(0)) {
throw af::exception("nn:Linear: Dimension mismatch between weight and bias.");
}*/
if (b.array().dims(1) != 1) {
throw af::exception("nn::Linear: Bias must be a vector.");
}
dim4 pdims = w.array().dims();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw I added .dims() method for Variable. You dont need to do w.array().dims().

m_wx = pdims[0];
m_wy = pdims[1];
}

Variable Conv2D::forward(const Variable &input)
{
auto res = conv2d(input, m_parameters[0], m_wx, m_wy, m_sx, m_sy, m_px, m_py);
if (m_bias) {
res = res + tileAs(m_parameters[1], res);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not familiar with bias in a Convolution layer. Let me know if you find a reference for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alexnet model I pulled from caffe's model zoo has both weights and biases for every learned layer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can view biases in this implementation http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@plavin I mean the way bias is used here. I don't know if it is the same as what we are doing in Linear layer.

}
return res;
}
}
}