Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/src/plasma/tf/make.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')

g++ -std=c++11 -g -shared plasma_op.cc -o plasma_op.so `pkg-config --cflags --libs plasma` -undefined dynamic_lookup -fPIC -I $TF_INC -O2
70 changes: 70 additions & 0 deletions cpp/src/plasma/tf/plasma_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"

#include "arrow/io/memory.h"
#include "arrow/ipc/reader.h"
#include "arrow/tensor.h"
#include "plasma/client.h"

using namespace tensorflow;

REGISTER_OP("PlasmaData")
.Input("object_id: string")
.Output("output: float32")
.Attr("socket: string");

// TODO(pcm): Make this zero-copy if possible

class PlasmaDataOp : public OpKernel {
public:
explicit PlasmaDataOp(OpKernelConstruction* context) : OpKernel(context) {
std::cout << "called constructor" << std::endl;
std::string socket;
OP_REQUIRES_OK(context, context->GetAttr("socket", &socket));
// Connect to plasma
ARROW_CHECK_OK(client_.Connect(socket, "", PLASMA_DEFAULT_RELEASE_DELAY));
std::cout << "constructor finished" << std::endl;
}

void Compute(OpKernelContext* context) override {
// Grab the input tensor
const Tensor& input_tensor = context->input(0);
auto input = input_tensor.flat<string>();

// Get the object
plasma::ObjectID object_id = plasma::ObjectID::from_binary(input(0));
plasma::ObjectBuffer object_buffer;
ARROW_CHECK_OK(client_.Get(&object_id, 1, -1, &object_buffer));

// Get the tensor
std::shared_ptr<arrow::Tensor> result;
arrow::io::BufferReader reader(object_buffer.data, object_buffer.data_size);
int64_t offset;
ARROW_CHECK_OK(reader.Tell(&offset));
ARROW_CHECK_OK(arrow::ipc::ReadTensor(0, &reader, &result));

std::cout << "shape is" << result->shape()[0] << " , " << result->shape()[1]
<< std::endl;

// Create an output tensor
TensorShape shape(result->shape());
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor));
auto output_flat = output_tensor->flat<float>();

// Set all but the first element of the output tensor to 0.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment perhaps about an older version of the code?

const int64_t N = result->size();
std::cout << "size is " << N << std::endl;
const float* data = reinterpret_cast<const float*>(result->data()->data());
for (int i = 0; i < N; i++) {
output_flat(i) = data[i];
}
}
~PlasmaDataOp() { ARROW_CHECK_OK(client_.Disconnect()); }

private:
plasma::PlasmaClient client_;
};

REGISTER_KERNEL_BUILDER(Name("PlasmaData").Device(DEVICE_CPU), PlasmaDataOp);
46 changes: 46 additions & 0 deletions cpp/src/plasma/tf/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
import pyarrow as pa
import pyarrow.plasma as plasma
import tensorflow as tf

import time

zero_out_module = tf.load_op_library('./plasma_op.so')

client = plasma.connect("/tmp/plasma", "", 64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor comment, but maybe do a mktemp type thing here instead? I know some folks run tests in parallel on the same machine.


data = np.random.randn(10000, 4000).astype("float32")
tensor = pa.Tensor.from_numpy(data)

data_id = client.put(tensor)

# plasma.ObjectID(np.random.bytes(20))
# data_size = pa.get_tensor_size(tensor)
# buf = client.create(object_id, data_size)
# stream = pa.FixedSizeBufferWriter(buf)
# pa.write_tensor(tensor, stream)
# client.seal(object_id)

sess = tf.Session()
object_id = tf.placeholder(tf.string)
load_op = zero_out_module.plasma_data([object_id], socket="/tmp/plasma")
a = time.time()
print("XXX", sess.run(load_op, feed_dict={object_id: data_id.binary()}))
b = time.time() - a
print("b1", b)
print("XXX", sess.run(load_op, feed_dict={object_id: data_id.binary()}))

placeholder = tf.placeholder(tf.float32, shape=(10000, 4000))

# variable = tf.Variable(placeholder, trainable=False, initializer=tf.random_uniform_initializer(-1.0, 1.0))

# sess.run(tf.global_variables_initializer())
a = time.time()
d = sess.run(placeholder, feed_dict={placeholder: data})
b = time.time() - a
print("b2", b)


print("ZZZ", d)

print("YYY", data)