apache · pcmoritz · Dec 1, 2017 · holdenk · Dec 12, 2017 · holdenk
diff --git a/cpp/src/plasma/tf/make.sh b/cpp/src/plasma/tf/make.sh
@@ -0,0 +1,3 @@
+TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
+
+g++ -std=c++11 -g -shared plasma_op.cc -o plasma_op.so `pkg-config --cflags --libs plasma` -undefined dynamic_lookup -fPIC -I $TF_INC -O2
diff --git a/cpp/src/plasma/tf/plasma_op.cc b/cpp/src/plasma/tf/plasma_op.cc
@@ -0,0 +1,70 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/tensor.h"
+#include "plasma/client.h"
+
+using namespace tensorflow;
+
+REGISTER_OP("PlasmaData")
+    .Input("object_id: string")
+    .Output("output: float32")
+    .Attr("socket: string");
+
+// TODO(pcm): Make this zero-copy if possible
+
+class PlasmaDataOp : public OpKernel {
+ public:
+  explicit PlasmaDataOp(OpKernelConstruction* context) : OpKernel(context) {
+    std::cout << "called constructor" << std::endl;
+    std::string socket;
+    OP_REQUIRES_OK(context, context->GetAttr("socket", &socket));
+    // Connect to plasma
+    ARROW_CHECK_OK(client_.Connect(socket, "", PLASMA_DEFAULT_RELEASE_DELAY));
+    std::cout << "constructor finished" << std::endl;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<string>();
+
+    // Get the object
+    plasma::ObjectID object_id = plasma::ObjectID::from_binary(input(0));
+    plasma::ObjectBuffer object_buffer;
+    ARROW_CHECK_OK(client_.Get(&object_id, 1, -1, &object_buffer));
+
+    // Get the tensor
+    std::shared_ptr<arrow::Tensor> result;
+    arrow::io::BufferReader reader(object_buffer.data, object_buffer.data_size);
+    int64_t offset;
+    ARROW_CHECK_OK(reader.Tell(&offset));
+    ARROW_CHECK_OK(arrow::ipc::ReadTensor(0, &reader, &result));
+
+    std::cout << "shape is" << result->shape()[0] << " , " << result->shape()[1]
+              << std::endl;
+
+    // Create an output tensor
+    TensorShape shape(result->shape());
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor));
+    auto output_flat = output_tensor->flat<float>();
+
+    // Set all but the first element of the output tensor to 0.
+    const int64_t N = result->size();
+    std::cout << "size is " << N << std::endl;
+    const float* data = reinterpret_cast<const float*>(result->data()->data());
+    for (int i = 0; i < N; i++) {
+      output_flat(i) = data[i];
+    }
+  }
+  ~PlasmaDataOp() { ARROW_CHECK_OK(client_.Disconnect()); }
+
+ private:
+  plasma::PlasmaClient client_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("PlasmaData").Device(DEVICE_CPU), PlasmaDataOp);
diff --git a/cpp/src/plasma/tf/test.py b/cpp/src/plasma/tf/test.py
@@ -0,0 +1,46 @@
+import numpy as np
+import pyarrow as pa
+import pyarrow.plasma as plasma
+import tensorflow as tf
+
+import time
+
+zero_out_module = tf.load_op_library('./plasma_op.so')
+
+client = plasma.connect("/tmp/plasma", "", 64)
+
+data = np.random.randn(10000, 4000).astype("float32")
+tensor = pa.Tensor.from_numpy(data)
+
+data_id = client.put(tensor)
+
+# plasma.ObjectID(np.random.bytes(20))
+# data_size = pa.get_tensor_size(tensor)
+# buf = client.create(object_id, data_size)
+# stream = pa.FixedSizeBufferWriter(buf)
+# pa.write_tensor(tensor, stream)
+# client.seal(object_id)
+
+sess = tf.Session()
+object_id = tf.placeholder(tf.string)
+load_op = zero_out_module.plasma_data([object_id], socket="/tmp/plasma")
+a = time.time()
+print("XXX", sess.run(load_op, feed_dict={object_id: data_id.binary()}))
+b = time.time() - a
+print("b1", b)
+print("XXX", sess.run(load_op, feed_dict={object_id: data_id.binary()}))
+
+placeholder = tf.placeholder(tf.float32, shape=(10000, 4000))
+
+# variable = tf.Variable(placeholder, trainable=False, initializer=tf.random_uniform_initializer(-1.0, 1.0))
+
+# sess.run(tf.global_variables_initializer())
+a = time.time()
+d = sess.run(placeholder, feed_dict={placeholder: data})
+b = time.time() - a
+print("b2", b)
+
+
+print("ZZZ", d)
+
+print("YYY", data)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')

		g++ -std=c++11 -g -shared plasma_op.cc -o plasma_op.so `pkg-config --cflags --libs plasma` -undefined dynamic_lookup -fPIC -I $TF_INC -O2