diff --git a/.gitignore b/.gitignore index b8bd026..91ec9f5 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ *.exe *.out *.app + +.cproject +.project +include/thrift +src/thrift diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..57392e9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,6 @@ +FIND_PACKAGE(ArrayFire) +INCLUDE_DIRECTORIES(${ArrayFire_INCLUDE_DIRS}) +... + +ADD_EXECUTABLE(some_executable ...) +TARGET_LINK_LIBRARIES(some_executable ${ArrayFire_LIBRARIES} ) \ No newline at end of file diff --git a/examples/googlenet/googlenet.cpp b/examples/googlenet/googlenet.cpp new file mode 100644 index 0000000..78a8349 --- /dev/null +++ b/examples/googlenet/googlenet.cpp @@ -0,0 +1,132 @@ +#include "afml/afml.hpp" + +using namespace afml; + +// https://github.com/soumith/imagenet-multiGPU.torch/blob/master/models/googlenet_cudnn.lua +// Note that Lua index starts from 1. +// TODO: make_shared is very frequently used. Is MS a good alias macro for make_shared? +// #define MS make_shared +NodePtr inception(const int inputSize, std::initializer_list& config) { + shared_ptr < Concat > concat(new Concat(1)); + if (config[0][0] != 0) { + shared_ptr < Sequential > conv1(new Sequential()); + conv1->add( + make_shared < Convolution > (inputSize, config[0][0], 1, 1, 1, 1))->add( + make_shared()); + concat->add(conv1); + } + + shared_ptr < Sequential > conv3(new Sequential()); + conv3->add(make_shared < Convolution > (inputSize, config[1][0], 1, 1, 1, 1)) + ->add(make_shared()); + conv3->add( + make_shared < Convolution + > (config[1][0], config[1][1], 3, 3, 1, 1, 1, 1))->add( + make_shared()); + concat->add(conv3); + + shared_ptr < Sequential > conv3xx(new Sequential()); + conv3xx->add( + make_shared < Convolution > (inputSize, config[2][0], 1, 1, 1, 1))->add( + make_shared()); + conv3xx->add( + make_shared < Convolution + > (config[2][0], config[2][1], 3, 3, 1, 1, 1, 1))->add( + make_shared()); + conv3xx->add( + make_shared < Convolution + > (config[2][1], config[2][1], 3, 3, 1, 1, 1, 1))->add( + make_shared()); + concat->add(conv3xx); + + shared_ptr < Sequential > pool(new Sequential()); + pool->add(make_shared < Padding > (3, 3, 1, 1)); + if (config[3][0] == PoolingType::MAX) { + pool->add(make_shared < MaxPooling > (3, 3, 1, 1)); + } else if (config[3][0] == PoolingType::AVERAGE) { + pool->add(make_shared < AveragePooling > (3, 3, 1, 1)); + } else { + printf("Unknown pooling"); + exit(1); + } + + if (config[3][1] != 0) { + pool->add( + make_shared < Convolution + > (inputSize, config[3][1], 1, 1, 1, 1)->add(make_shared())); + } + concat->add(pool); + return concat; + +} + +NodePtr createModel(int numGPU) { + shared_ptr < Sequential > features(new Sequential()); + features->add(make_shared < Convolution > (3, 64, 7, 7, 2, 2, 3, 3))->add( + make_shared()); + features->add(make_shared < MaxPooling > (3, 3, 2, 2)); + features->add(make_shared < Convolution > (64, 64, 1, 1))->add( + make_shared()); + features->add(make_shared < Convolution > (64, 192, 3, 3, 1, 1, 1, 1))->add( + make_shared()); + features->add(make_shared < MaxPooling > (3, 3, 2, 2)); + + int uselessPlaceHoder = -1; + features->add(inception(192, { { 64, uselessPlaceHoder }, { 64, 64 }, + { 64, 96 }, { PoolingType::AVERAGE, 32 } })); + features->add(inception(256, { { 64, uselessPlaceHoder }, { 64, 96 }, + { 64, 96 }, { PoolingType::AVERAGE, 64 } })); + features->add(inception(320, { { 0, uselessPlaceHoder }, { 128, 160 }, { 64, + 96 }, { PoolingType::MAX, 0 } })); + features->add(make_shared < Convolution > (576, 576, 2, 2, 2, 2)); + features->add(inception(576, { { 224, uselessPlaceHoder }, { 64, 96 }, { 96, + 128 }, { PoolingType::AVERAGE, 128 } })); + features->add(inception(576, { { 192, uselessPlaceHoder }, { 96, 128 }, { 96, + 128 }, { PoolingType::AVERAGE, 128 } })); + features->add(inception(576, { { 160, uselessPlaceHoder }, { 128, 160 }, { + 128, 160 }, { PoolingType::AVERAGE, 96 } })); + features->add(inception(576, { { 96, uselessPlaceHoder }, { 128, 192 }, { 160, + 192 }, { PoolingType::AVERAGE, 96 } })); + + shared_ptr < Sequential > mainBranch(new Sequential()); + mainBranch->add(inception(576, { { 0, uselessPlaceHoder }, { 128, 192 }, { + 192, 256 }, { PoolingType::MAX, 0 } })); + mainBranch->add(make_shared < Convolution > (1024, 1024, 2, 2, 2, 2)); + mainBranch->add(inception(1024, { { 352 }, { 192, 320 }, { 160, 224 }, { + PoolingType::AVERAGE, 128 } })); + mainBranch->add(inception(1024, { { 352 }, { 192, 320 }, { 192, 224 }, { + PoolingType::MAX, 128 } })); + mainBranch->add(make_shared < AveragePooling > (7, 7, 1, 1)); + mainBranch->add(make_shared < View > (1024)->withNumInputDims(3)); + mainBranch->add(make_shared < Linear > (1024, 1000)); + mainBranch->add(make_shared()); + + shared_ptr < Sequential > auxClassifier(new Sequential()); + auxClassifier->add(make_shared < AveragePooling > (5, 5, 3, 3)); + auxClassifier->add(make_shared < Convolution > (576, 128, 1, 1, 1, 1)); + auxClassifier->add(make_shared < View > (128 * 4 * 4)->withNumInputDims(3)); + auxClassifier->add(make_shared < Linear > (128 * 4 * 4, 768)); + auxClassifier->add(make_shared()); + auxClassifier->add(make_shared < Linear > (768, 1000)); + auxClassifier->add(make_shared()); + + shared_ptr < Concat > splitter(new Concat(1)); + splitter->add(mainBranch)->add(auxClassifier); + shared_ptr < Sequential > model = make_shared()->add(features) + ->add(splitter); + + if (numGPU > 0) { + shared_ptr < DataParallel > dp(new DataParallel(1)); + for (int i = 0; i < numGPU; ++i) { + dp->add(i, root->clone()); + } + return dp; + } + return model; +} + +int main(int argc, char *argv[]) { + int numGPU = atoi(argv[1]); + NodePtr model = createModel(numGPU); + printf(model->toString()); +} diff --git a/include/afml/.gitignore b/include/afml/.gitignore new file mode 100644 index 0000000..67a9013 --- /dev/null +++ b/include/afml/.gitignore @@ -0,0 +1 @@ +/thrift/ diff --git a/include/afml/afml.hpp b/include/afml/afml.hpp new file mode 100644 index 0000000..1ad1987 --- /dev/null +++ b/include/afml/afml.hpp @@ -0,0 +1,17 @@ +#ifndef AFML_AFML_HPP_ +#define AFML_AFML_HPP_ + +namespace afml { + +#include "afml/common.hpp" +#include "afml/container.hpp" +#include "afml/io.hpp" +#include "afml/node.hpp" +#include "afml/nodes.hpp" +#include "afml/parallel.hpp" + + +} // namespace afml + + +#endif /* AFML_AFML_HPP_ */ diff --git a/include/afml/common.hpp b/include/afml/common.hpp new file mode 100644 index 0000000..c2287c3 --- /dev/null +++ b/include/afml/common.hpp @@ -0,0 +1,53 @@ +#ifndef AFML_COMMON_HPP_ +#define AFML_COMMON_HPP_ + +#include + +#if __cplusplus < 201100L +#include +#include +#include +#else +#include +#include +#include +#include +#endif + +#include "afml/thrift/afml_types.h" + +namespace afml { + +using namespace af; +using namespace boost; +using namespace std; + +#if __cplusplus < 201100L +using boost::make_shared; +using boost::shared_ptr; +using boost::nullptr; +using boost::unordered_map; +#else +using std::initializer_list; +using std::make_shared; +using std::shared_ptr; // Can CUDA with this? +using std::nullptr; +using std::unordered_map; +#endif + +typedef unordered_map map; + +// Just to be consistent in camel case style. +typedef array Array; + +typedef vector ArrayVec; +typedef shared_ptr ArrayPtr; +typedef vector ArrayPtrVec; + +class Node; +typedef shared_ptr NodePtr; +typedef vector NodePtrVec; + +} // namespace afml + +#endif /* AFML_COMMON_HPP_ */ diff --git a/include/afml/container.hpp b/include/afml/container.hpp new file mode 100644 index 0000000..eb2c9ff --- /dev/null +++ b/include/afml/container.hpp @@ -0,0 +1,62 @@ +#ifndef AFML_CONTAINER_HPP_ +#define AFML_CONTAINER_HPP_ + +#include "afml/afml.hpp" + +namespace afml { + +// From Torch7 +// https://github.com/torch/nn/blob/master/Container.lua +// Contains multiple nodes for easier management or building complex networks +class Container : public Node { + public: + Container(); + + // Return this shared_ptr to chain calls add()->add()->add() + virtual shared_ptr add(const NodePtr node); + + // Returns the contained modules at index index. + virtual NodePtr get(const size_t index) const; + + // Returns the number of contained modules. + size_t size() const; + + virtual void forward(); + virtual void backward(); + virtual void toString() const; +}; + +// https://github.com/torch/nn/blob/master/Concat.lua +class Concat : public Container { + public: + Concat(const size_t concatDim); + virtual ~Concat(); + +}; + +// https://github.com/torch/nn/blob/master/Sequential.lua +// To simplify management of sequentially connected nodes. +class Sequential : public Container { + virtual ~Sequential(); + // Return this shared_ptr to chain calls add()->add()->add() + virtual shared_ptr add(const NodePtr node); + virtual void insert(const NodePtr node, const size_t index); + virtual void remove (const size_t index); + virtual void toString() const; +}; + +// https://github.com/torch/nn/blob/master/Parallel.lua +// To run multiple copies of a part of a model on different GPUs. +class Parallel : public Container { + public: + Parallel(const size_t inputDim, const size_t outputDim); + virtual ~Parallel(); + virtual void forward(); + virtual void backward(); + virtual void toString() const; +}; + + +} // namespace afml + +#endif /* AFML_CONTAINER_HPP_ */ diff --git a/include/afml/io.hpp b/include/afml/io.hpp new file mode 100644 index 0000000..3e7b5a7 --- /dev/null +++ b/include/afml/io.hpp @@ -0,0 +1,20 @@ +#ifndef AFML_IO_HPP_ +#define AFML_IO_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +class SerDe { + public: + + // Defined in thrift/afml.thrift + Data& serialize(const Array& arr) const; + Array& deserialize(const Data& data); +}; + + +} // namespace afml + + +#endif /* AFML_IO_HPP_ */ diff --git a/include/afml/node.hpp b/include/afml/node.hpp new file mode 100644 index 0000000..e588e0f --- /dev/null +++ b/include/afml/node.hpp @@ -0,0 +1,252 @@ +#ifndef AFML_NODE_HPP_ +#define AFML_NODE_HPP_ + +#include +#include +#include + +#include "afml/common.hpp" + +namespace afml { + +// https://github.com/BVLC/caffe/blob/master/include/caffe/layer.hpp +// https://github.com/torch/nn/blob/master/Module.lua +class Node { + public: + explicit Node(const NodeConfig& nodeConfig); + virtual ~Node(); + // getAllNodes and traverse are from + // https://github.com/zxie/nn/blob/master/nets/graph.py + static NodePtrVec getAllNodes(const NodePtrVec& startNodes); + template + static void traverse(const NodePtrVec& startNodes, Function fn) { + NodePtrVec readyNodes; + map deps; + for (size_t i = 0; i, startNodes.size(); ++i) { + if (startNodes[i]->numPrev() == 0) { + readyNodes.push_back(startNodes[i]); + } + deps[startNodes[i]->name()] = startNodes[i]->numPrev(); + } + vector < string > names; + while (readyNodes.size() > 0) { + NodePtrVec nextReadyNodes; + for (size_t i = 0; i < readyNodes.size(); ++i) { + fn(readyNodes[i]); + names = readyNodes[i]->nextNames(); + for (size_t j = 0; j < names.size(); ++j) { + deps[names[j]]--; + if (deps[names[j]] == 0) { + nextReadyNodes.push_back(readyNodes[i]->next(names[j])); + } + } + deps.erase(readyNodes[i]->name()); + } + readyNodes = nextReadyNodes; + } + } + + // Input and output are more general than the top and bottom of Caffe + virtual void forward() = 0; + // Since the network is DAG, propagate_back is more general than + // propagate_down of Caffe + virtual void backward() = 0; + + void computeGradientWrtOutput() { + gradientWrtInput_ = nextNodes_[0].gradient(); + for (size_t i = 1; i < nextNodes_.size(); ++i) { + gradientWrtInput_ += nextNodes_[i].gradient(); + } + } + + void composeGradient() { + gradientWrtInput_ *= gradientWrtOutput_; + } + + // Must be called after all the addNext has been called + // i.e. the nodes in the network has connected with each other + // Calls initNode which subclass can override + void init() { + thisNode_.reset(NodePtr); + checkNumNextPrevNodes(); + initNode(); + } + + string name() const { + return name_; + } + + // Must make sure there's no copy to return Array + ArrayPtr output() const { + return output_; + } + + ArrayPtr gradient() const { + return gradientWrtInput(); + } + + ArrayPtr gradientWrtInput() const { + return gradientWrtInput_; + } + + ArrayPtr gradientWrtOutput() const { + return gradientWrtOutput_; + } + + ArrayPtr param(const string& name) const { + CHECK(params_.find(name) != params_.end()); + return params_[name]; + } + + ArrayPtr gradientWrtParams(const string& name) const { + CHECK(gradientWrtParams_.find(name) != gradientWrtParams_.end()); + return gradientWrtParams_[name]; + } + + void add(NodePtr node) { + addNext(node); + } + + // The model is DAG(Directed Acyclic Graph) + void addNext(NodePtr node) { + if (!hasNext(node) && node->hasPrev(thisNode_)) { + next_[node->name()] = node; + nextNodes_.push_back(node); + node->addPrev(thisNode_); + } + } + + void addPrev(NodePtr node) { + prev_[node->name()] = node; + prevNodes_.push_back(node); + } + +// Shortcut to add multiple nodes + void addNext(vector& nodes) { + for (size_t i = 0; i < nodes.size(); ++i) { + addNext(nodes[i]); + } + } + + void addPrev(vector& nodes) { + for (size_t i = 0; i < nodes.size(); ++i) { + addPrev(nodes[i]); + } + } + + bool hasNext(NodePtr node) const { + return next_.find(node) != next_.end(); + } + + bool hasPrev(NodePtr node) const { + return prev_.find(node) != prev_.end(); + } + + map& next() const { + return next_; + } + + map& prev() const { + return prev_; + } + + vector nextNames() const { + vector < string > names; + for (map::iterator iter = next_.begin(); + iter != next_.end(); ++iter) { + names.push_back(iter->name()); + } + return names; + } + + vector prevNames() const { + vector < string > names; + for (map::iterator iter = prev_.begin(); + iter != prev_.end(); ++iter) { + names.push_back(iter->name()); + } + return names; + } + + vector& nextNodes() const { + return nextNodes_; + } + + vector& prevNodes() const { + return prevNodes_; + } + + NodePtr next(const string& name) const { + if (next_.find(name) != next_.end()) { + return next_[name]; + } + return nullptr; + } + + NodePtr prev(const string& name) const { + if (prev_.find(name) != prev_.end()) { + return prev_[name]; + } + return nullptr; + } + + size_t numNext() const { + return next_.size(); + } + + size_t numPrev() const { + return prev_.size(); + } + + // From Torch7 module API + // https://github.com/torch/nn/blob/master/doc/module.md + void training() { + train_ = true; + } + void evaluating() { + train_ = false; + } + + void shareParams(const NotePtr node); + void shareParams(const NotePtr node, const string& name); + void shareParams(const NotePtr node, const string& name1, const string& name2); + void shareParams(const NotePtr node, const vector& names); + NodePtr clone(); + NodePtr clone(const string& name); + NodePtr clone(const string& name1, const string& name2); + NodePtr clone(const vector& names); + + // Use CPU or GPU + void cpu(); + void gpu(); + + virtual void toString() const; + + protected: + virtual void checkNumNextPrevNodes() = 0; + virtual void initNode() = 0; + + string name_; + NodeConfig& config_; + NodePtr thisNode_; + ArrayPtr output_; + // wrt = ith regardt to + ArrayPtr gradientWrtInput_; + ArrayPtr gradientWrtOutput_; + map params_; + map gradientWrtParams_; + map next_; + vector nextNodes_; + map prev_; + vector prevNodes_; + size_t expectedNumNextNodes_; + size_t expectedNumPrevNodes_; + size_t expectedMinNumNextNodes_; + size_t expectedMinNumPrevNodes_; + size_t expectedMaxNumNextNodes_; + size_t expectedMaxNumPrevNodes_; +}; + +} // namespace afml + +#endif /* AFML_NODE_HPP_ */ diff --git a/include/afml/nodes.hpp b/include/afml/nodes.hpp new file mode 100644 index 0000000..18a16a8 --- /dev/null +++ b/include/afml/nodes.hpp @@ -0,0 +1,10 @@ +#ifndef AFML_NODES_HPP_ +#define AFML_NODES_HPP_ + +#include "afml/nodes/common_nodes.hpp" +#include "afml/nodes/convolution_nodes.hpp" +#include "afml/nodes/data_nodes.hpp" +#include "afml/nodes/elementwise_nodes.hpp" +#include "afml/nodes/loss_nodes.hpp" + +#endif /* AFML_NODES_HPP_ */ diff --git a/include/afml/nodes/common_nodes.hpp b/include/afml/nodes/common_nodes.hpp new file mode 100644 index 0000000..5a6b2aa --- /dev/null +++ b/include/afml/nodes/common_nodes.hpp @@ -0,0 +1,63 @@ +#ifndef AFML_COMMON_NODES_HPP_ +#define AFML_COMMON_NODES_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +enum PoolingType { + AVERAGE, + MAX, + STOCHASTIC +}; + +// Fully connected inner product of input and output +class Linear : public Node { + virtual ~Linear(); + virtual void checkNumNextPrevNodes() { + CHECK(nextNodes_.size() >= expectedMinNumNextNodes_); + CHECK(prevNodes_.size() == expectedNumPrevNodes_); + } + + virtual void initNode() { + output_->resize(prevNodes_[0].output()->dims()); + } + + virtual void forward() { + output_ = params_['W'] * prevNodes_[0].output(); + } + + virtual void backward() { + computeGradientWrtOutput(); + gradientWrtInput_ = params_['W']->T() * gradientWrtOutput_; + gradientWrtParams_['W'] = gradientWrtOutput_->matmul(output_->T()); + } +}; + +class Softmax : public Node { + virtual ~Softmax(); + virtual void checkNumNextPrevNodes() { + CHECK(nextNodes_.size() >= expectedMinNumNextNodes_); + CHECK(prevNodes_.size() == expectedNumPrevNodes_); + } + + virtual void initNode() { + output_->resize(prevNodes_[0].output()->dims()); + } + + virtual void forward() { + int axis = 0; + Array probs = (prevNodes_[0].output() - prevNodes_[0].output().max(axis)).exp(); + output_ = probs / probs.sum(axis); + } + + virtual void backward() { + gradient_ = output_ * output_ - output_; + } + +}; + +} // namespace afml + + +#endif /* AFML_COMMON_NODES_HPP_ */ diff --git a/include/afml/nodes/convolution_nodes.hpp b/include/afml/nodes/convolution_nodes.hpp new file mode 100644 index 0000000..3a38f9b --- /dev/null +++ b/include/afml/nodes/convolution_nodes.hpp @@ -0,0 +1,42 @@ +#ifndef AFML_CONVOLUTION_NODES_HPP_ +#define AFML_CONVOLUTION_NODES_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +// TODO: Which is better, Caffe or Torch7's convolution API? + +// https://github.com/BVLC/caffe/blob/master/include/caffe/vision_layers.hpp +class BaseConvolution : public Node { + +}; + +class Convolution : public BaseConvolution { + +}; + +class DeconvolutionLayer : public BaseConvolution { + +}; + + +// What's the difference between SpatialConvolution, SpatialConvolutionMM, +// SpatialConvolutionMap, SpatialFullConvolution, SpatialFullConvolutionMap, +// TemporalConvolution, and VolumetricConvolution of torch/nn? +// Can they be simplified and unified? +// https://github.com/torch/nn/ + +// https://github.com/torch/nn/blob/master/SpatialConvolution.lua +class SpatialConvolution : public Node { + public: + SpatialConvolution(const string& name, const size_t numInputPlane, + const size_t numOutputPlane, const size_t kernalWidth, + const size_t kernalHeight, const size_t strideWidth, + const size_t strideHeight, const size_t padWidth, const size_t padHeight); +}; + +} // namespace afml + +#endif /* AFML_CONVOLUTION_NODES_HPP_ */ + diff --git a/include/afml/nodes/data_nodes.hpp b/include/afml/nodes/data_nodes.hpp new file mode 100644 index 0000000..801c135 --- /dev/null +++ b/include/afml/nodes/data_nodes.hpp @@ -0,0 +1,26 @@ +#ifndef AFML_DATA_NODES_HPP_ +#define AFML_DATA_NODES_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +class Data : public Node { + virtual ~Data(); + virtual void checkNumNextPrevNodes() { + } + + virtual void initNode() { + } + + virtual void forward() { + } + + virtual void backward() { + } +}; + +} // namespace afml + + +#endif /* AFML_DATA_NODES_HPP_ */ diff --git a/include/afml/nodes/elementwise_nodes.hpp b/include/afml/nodes/elementwise_nodes.hpp new file mode 100644 index 0000000..0c33423 --- /dev/null +++ b/include/afml/nodes/elementwise_nodes.hpp @@ -0,0 +1,33 @@ +#ifndef AFML_ELEMENTWISE_NODES_HPP_ +#define AFML_ELEMENTWISE_NODES_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +class ReLU : public Node { + virtual ~ReLU(); + virtual void checkNumNextPrevNodes() { + CHECK(nextNodes_.size() >= expectedMinNumNextNodes_); + CHECK(prevNodes_.size() == expectedNumPrevNodes_); + } + + virtual void initNode() { + output_->resize(prevNodes_[0].output()->dims()); + } + + virtual void forward() { + output_ = prevNodes_[0].output() * (prevNodes_[0].output() > 0); + } + + virtual void backward() { + gradient_ = output_ > 0; + computeGradientInput(); + composeGradient(); + } +}; + +} // namespace afml + + +#endif /* AFML_ELEMENTWISE_NODES_HPP_ */ diff --git a/include/afml/nodes/loss_nodes.hpp b/include/afml/nodes/loss_nodes.hpp new file mode 100644 index 0000000..b38c73c --- /dev/null +++ b/include/afml/nodes/loss_nodes.hpp @@ -0,0 +1,20 @@ +#ifndef AFML_LOSS_NODES_HPP_ +#define AFML_LOSS_NODES_HPP_ + +#include "afml/common.hpp" + +namespace afml { + +class Accuracy : public Node { + +}; + +// This is probably the most commonly used loss for classification +class NegativeLogLikelihood : public Node { + +}; + +} // namespace afml + + +#endif /* AFML_LOSS_NODES_HPP_ */ diff --git a/include/afml/parallel.hpp b/include/afml/parallel.hpp new file mode 100644 index 0000000..3dc2e65 --- /dev/null +++ b/include/afml/parallel.hpp @@ -0,0 +1,46 @@ +#ifndef AFML_PARALLEL_HPP_ +#define AFML_PARALLEL_HPP_ + +#include "afml/common.hpp" +#include "afml/container.hpp" + +namespace afml { + +// https://github.com/facebook/fbcunn/blob/master/fbcunn/AbstractParallel.lua +class AbstractParallel : public Container { + public: + AbstractParallel(const size_t dim); + virtual size_t nextGPU() const; + // Add the node to run on gpuID + virtual void add(const size_t gpuID, const NodePtr node); + virtual NodePtr get(const size_t index) const; + void asyncCopy(const ArrayPtr source, const ArrayPtr dest); + protected: + void distributeGradientWrtOutput(); + +}; + +// https://github.com/facebook/fbcunn/blob/master/fbcunn/DataParallel.lua +class DataParallel : public AbstractParallel { + protected: + void distributeInput(const ArrayPtr input); + void gatherGradients(); + void combineGradients(const size_t row, const ArrayPtrVec& gradients); + +}; + +// https://github.com/facebook/fbcunn/blob/master/fbcunn/ModelParallel.lua +class ModelParallel : public AbstractParallel { + public: + ModelParallel(const size_t dim); + virtual size_t nextGPU() const; + virtual void add(const size_t gpuID, const NodePtr node); + virtual NodePtr get(const size_t index) const; + void distributeInput(const ArrayPtr input); + +}; + +} // namespace afml + + +#endif /* AFML_PARALLEL_HPP_ */ diff --git a/src/afml/.gitignore b/src/afml/.gitignore new file mode 100644 index 0000000..67a9013 --- /dev/null +++ b/src/afml/.gitignore @@ -0,0 +1 @@ +/thrift/ diff --git a/thrift/afml.thrift b/thrift/afml.thrift new file mode 100644 index 0000000..1e2a362 --- /dev/null +++ b/thrift/afml.thrift @@ -0,0 +1,22 @@ +namespace cpp afml +namespace csharp afml +namespace go afml +namespace html afml +namespace java afml +namespace js afml +namespace json afml +namespace lua afml +namespace perl afml +namespace php afml +namespace py afml +namespace rb afml + +// In Torch7, tensors are backed by storages +struct Storage { + 1: list dims, + 2: string data +} + +struct NodeConfig { + 1: string name, +} \ No newline at end of file diff --git a/thrift/gen_thrift.sh b/thrift/gen_thrift.sh new file mode 100755 index 0000000..64c6550 --- /dev/null +++ b/thrift/gen_thrift.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +NAME=afml + +#include "afml/thrift/afml_constants.h" + +thrift -r --gen cpp ${NAME}.thrift +sed -i "s|#include \"${NAME}_constants|#include \"${NAME}/thrift/${NAME}_constants|g" gen-cpp/* +sed -i "s|#include \"${NAME}_types|#include \"${NAME}/thrift/${NAME}_types|g" gen-cpp/* +if [ ! -d ../include/${NAME}/thrift ]; then + mkdir ../include/${NAME}/thrift +fi +mv gen-cpp/*.h ../include/${NAME}/thrift +if [ ! -d ../src/${NAME}/thrift ]; then + mkdir ../src/${NAME}/thrift +fi +mv gen-cpp/*.cpp ../src/${NAME}/thrift +rm -rf gen-cpp + +# thrift -r --gen java afml.thrift +# thrift -r --gen js afml.thrift +# thrift -r --gen json afml.thrift +# thrift -r --gen lua afml.thrift +# thrift -r --gen py afml.thrift