arrayfire · futurely · Jun 28, 2015
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,8 @@
 *.exe
 *.out
 *.app
+
+.cproject
+.project
+include/thrift
+src/thrift
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,6 @@
+FIND_PACKAGE(ArrayFire)
+INCLUDE_DIRECTORIES(${ArrayFire_INCLUDE_DIRS})
+...
+
+ADD_EXECUTABLE(some_executable ...)
+TARGET_LINK_LIBRARIES(some_executable ${ArrayFire_LIBRARIES} )
diff --git a/examples/googlenet/googlenet.cpp b/examples/googlenet/googlenet.cpp
@@ -0,0 +1,132 @@
+#include "afml/afml.hpp"
+
+using namespace afml;
+
+// https://github.com/soumith/imagenet-multiGPU.torch/blob/master/models/googlenet_cudnn.lua
+// Note that Lua index starts from 1.
+// TODO: make_shared is very frequently used. Is MS a good alias macro for make_shared?
+// #define MS make_shared
+NodePtr inception(const int inputSize, std::initializer_list<int>& config) {
+  shared_ptr < Concat > concat(new Concat(1));
+  if (config[0][0] != 0) {
+    shared_ptr < Sequential > conv1(new Sequential());
+    conv1->add(
+        make_shared < Convolution > (inputSize, config[0][0], 1, 1, 1, 1))->add(
+        make_shared<ReLU>());
+    concat->add(conv1);
+  }
+
+  shared_ptr < Sequential > conv3(new Sequential());
+  conv3->add(make_shared < Convolution > (inputSize, config[1][0], 1, 1, 1, 1))
+      ->add(make_shared<ReLU>());
+  conv3->add(
+      make_shared < Convolution
+          > (config[1][0], config[1][1], 3, 3, 1, 1, 1, 1))->add(
+      make_shared<ReLU>());
+  concat->add(conv3);
+
+  shared_ptr < Sequential > conv3xx(new Sequential());
+  conv3xx->add(
+      make_shared < Convolution > (inputSize, config[2][0], 1, 1, 1, 1))->add(
+      make_shared<ReLU>());
+  conv3xx->add(
+      make_shared < Convolution
+          > (config[2][0], config[2][1], 3, 3, 1, 1, 1, 1))->add(
+      make_shared<ReLU>());
+  conv3xx->add(
+      make_shared < Convolution
+          > (config[2][1], config[2][1], 3, 3, 1, 1, 1, 1))->add(
+      make_shared<ReLU>());
+  concat->add(conv3xx);
+
+  shared_ptr < Sequential > pool(new Sequential());
+  pool->add(make_shared < Padding > (3, 3, 1, 1));
+  if (config[3][0] == PoolingType::MAX) {
+    pool->add(make_shared < MaxPooling > (3, 3, 1, 1));
+  } else if (config[3][0] == PoolingType::AVERAGE) {
+    pool->add(make_shared < AveragePooling > (3, 3, 1, 1));
+  } else {
+    printf("Unknown pooling");
+    exit(1);
+  }
+
+  if (config[3][1] != 0) {
+    pool->add(
+        make_shared < Convolution
+            > (inputSize, config[3][1], 1, 1, 1, 1)->add(make_shared<ReLU>()));
+  }
+  concat->add(pool);
+  return concat;
+
+}
+
+NodePtr createModel(int numGPU) {
+  shared_ptr < Sequential > features(new Sequential());
+  features->add(make_shared < Convolution > (3, 64, 7, 7, 2, 2, 3, 3))->add(
+      make_shared<ReLU>());
+  features->add(make_shared < MaxPooling > (3, 3, 2, 2));
+  features->add(make_shared < Convolution > (64, 64, 1, 1))->add(
+      make_shared<ReLU>());
+  features->add(make_shared < Convolution > (64, 192, 3, 3, 1, 1, 1, 1))->add(
+      make_shared<ReLU>());
+  features->add(make_shared < MaxPooling > (3, 3, 2, 2));
+
+  int uselessPlaceHoder = -1;
+  features->add(inception(192, { { 64, uselessPlaceHoder }, { 64, 64 },
+                              { 64, 96 }, { PoolingType::AVERAGE, 32 } }));
+  features->add(inception(256, { { 64, uselessPlaceHoder }, { 64, 96 },
+                              { 64, 96 }, { PoolingType::AVERAGE, 64 } }));
+  features->add(inception(320, { { 0, uselessPlaceHoder }, { 128, 160 }, { 64,
+      96 }, { PoolingType::MAX, 0 } }));
+  features->add(make_shared < Convolution > (576, 576, 2, 2, 2, 2));
+  features->add(inception(576, { { 224, uselessPlaceHoder }, { 64, 96 }, { 96,
+      128 }, { PoolingType::AVERAGE, 128 } }));
+  features->add(inception(576, { { 192, uselessPlaceHoder }, { 96, 128 }, { 96,
+      128 }, { PoolingType::AVERAGE, 128 } }));
+  features->add(inception(576, { { 160, uselessPlaceHoder }, { 128, 160 }, {
+      128, 160 }, { PoolingType::AVERAGE, 96 } }));
+  features->add(inception(576, { { 96, uselessPlaceHoder }, { 128, 192 }, { 160,
+      192 }, { PoolingType::AVERAGE, 96 } }));
+
+  shared_ptr < Sequential > mainBranch(new Sequential());
+  mainBranch->add(inception(576, { { 0, uselessPlaceHoder }, { 128, 192 }, {
+      192, 256 }, { PoolingType::MAX, 0 } }));
+  mainBranch->add(make_shared < Convolution > (1024, 1024, 2, 2, 2, 2));
+  mainBranch->add(inception(1024, { { 352 }, { 192, 320 }, { 160, 224 }, {
+      PoolingType::AVERAGE, 128 } }));
+  mainBranch->add(inception(1024, { { 352 }, { 192, 320 }, { 192, 224 }, {
+      PoolingType::MAX, 128 } }));
+  mainBranch->add(make_shared < AveragePooling > (7, 7, 1, 1));
+  mainBranch->add(make_shared < View > (1024)->withNumInputDims(3));
+  mainBranch->add(make_shared < Linear > (1024, 1000));
+  mainBranch->add(make_shared<LogSoftmax>());
+
+  shared_ptr < Sequential > auxClassifier(new Sequential());
+  auxClassifier->add(make_shared < AveragePooling > (5, 5, 3, 3));
+  auxClassifier->add(make_shared < Convolution > (576, 128, 1, 1, 1, 1));
+  auxClassifier->add(make_shared < View > (128 * 4 * 4)->withNumInputDims(3));
+  auxClassifier->add(make_shared < Linear > (128 * 4 * 4, 768));
+  auxClassifier->add(make_shared<ReLU>());
+  auxClassifier->add(make_shared < Linear > (768, 1000));
+  auxClassifier->add(make_shared<LogSoftmax>());
+
+  shared_ptr < Concat > splitter(new Concat(1));
+  splitter->add(mainBranch)->add(auxClassifier);
+  shared_ptr < Sequential > model = make_shared<Sequential>()->add(features)
+      ->add(splitter);
+
+  if (numGPU > 0) {
+    shared_ptr < DataParallel > dp(new DataParallel(1));
+    for (int i = 0; i < numGPU; ++i) {
+      dp->add(i, root->clone());
+    }
+    return dp;
+  }
+  return model;
+}
+
+int main(int argc, char *argv[]) {
+  int numGPU = atoi(argv[1]);
+  NodePtr model = createModel(numGPU);
+  printf(model->toString());
+}
diff --git a/include/afml/.gitignore b/include/afml/.gitignore
@@ -0,0 +1 @@
+/thrift/
diff --git a/include/afml/afml.hpp b/include/afml/afml.hpp
@@ -0,0 +1,17 @@
+#ifndef AFML_AFML_HPP_
+#define AFML_AFML_HPP_
+
+namespace afml {
+
+#include "afml/common.hpp"
+#include "afml/container.hpp"
+#include "afml/io.hpp"
+#include "afml/node.hpp"
+#include "afml/nodes.hpp"
+#include "afml/parallel.hpp"
+
+
+}  // namespace afml
+
+
+#endif /* AFML_AFML_HPP_ */
diff --git a/include/afml/common.hpp b/include/afml/common.hpp
@@ -0,0 +1,53 @@
+#ifndef AFML_COMMON_HPP_
+#define AFML_COMMON_HPP_
+
+#include <arrayfire.h>
+
+#if __cplusplus < 201100L
+#include <boost/make_shared.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
+#else
+#include <initializer_list>
+#include <memory>
+#include <unordered_map>
+#include <cstddef>
+#endif
+
+#include "afml/thrift/afml_types.h"
+
+namespace afml {
+
+using namespace af;
+using namespace boost;
+using namespace std;
+
+#if __cplusplus < 201100L
+using boost::make_shared;
+using boost::shared_ptr;
+using boost::nullptr;
+using boost::unordered_map;
+#else
+using std::initializer_list;
+using std::make_shared;
+using std::shared_ptr; // Can CUDA with this?
+using std::nullptr;
+using std::unordered_map;
+#endif
+
+typedef unordered_map map;
+
+// Just to be consistent in camel case style.
+typedef array Array;
+
+typedef vector<Array> ArrayVec;
+typedef shared_ptr<Array> ArrayPtr;
+typedef vector<ArrayPtr> ArrayPtrVec;
+
+class Node;
+typedef shared_ptr<Node> NodePtr;
+typedef vector<NodePtr> NodePtrVec;
+
+}  // namespace afml
+
+#endif /* AFML_COMMON_HPP_ */
diff --git a/include/afml/container.hpp b/include/afml/container.hpp
@@ -0,0 +1,62 @@
+#ifndef AFML_CONTAINER_HPP_
+#define AFML_CONTAINER_HPP_
+
+#include "afml/afml.hpp"
+
+namespace afml {
+
+// From Torch7
+// https://github.com/torch/nn/blob/master/Container.lua
+// Contains multiple nodes for easier management or building complex networks
+class Container : public Node {
+ public:
+  Container();
+
+  // Return this shared_ptr to chain calls add()->add()->add()
+  virtual shared_ptr<Container> add(const NodePtr node);
+
+  // Returns the contained modules at index index.
+  virtual NodePtr get(const size_t index) const;
+
+  // Returns the number of contained modules.
+  size_t size() const;
+
+  virtual void forward();
+  virtual void backward();
+  virtual void toString() const;
+};
+
+// https://github.com/torch/nn/blob/master/Concat.lua
+class Concat : public Container {
+ public:
+  Concat(const size_t concatDim);
+  virtual ~Concat();
+
+};
+
+// https://github.com/torch/nn/blob/master/Sequential.lua
+// To simplify management of sequentially connected nodes.
+class Sequential : public Container {
+  virtual ~Sequential();
+  // Return this shared_ptr to chain calls add()->add()->add()
+  virtual shared_ptr<SequentialContainer> add(const NodePtr node);
+  virtual void insert(const NodePtr node, const size_t index);
+  virtual void remove (const size_t index);
+  virtual void toString() const;
+};
+
+// https://github.com/torch/nn/blob/master/Parallel.lua
+// To run multiple copies of a part of a model on different GPUs.
+class Parallel : public Container {
+ public:
+  Parallel(const size_t inputDim, const size_t outputDim);
+  virtual ~Parallel();
+  virtual void forward();
+  virtual void backward();
+  virtual void toString() const;
+};
+
+
+}  // namespace afml
+
+#endif /* AFML_CONTAINER_HPP_ */
diff --git a/include/afml/io.hpp b/include/afml/io.hpp
@@ -0,0 +1,20 @@
+#ifndef AFML_IO_HPP_
+#define AFML_IO_HPP_
+
+#include "afml/common.hpp"
+
+namespace afml {
+
+class SerDe {
+ public:
+
+  // Defined in thrift/afml.thrift
+  Data& serialize(const Array& arr) const;
+  Array& deserialize(const Data& data);
+};
+
+
+}  // namespace afml
+
+
+#endif /* AFML_IO_HPP_ */