Skip to content

Commit

Permalink
Merge pull request #142 from sashafrey/master
Browse files Browse the repository at this point in the history
Introduce AddBatchArgs.batch_file_name field
  • Loading branch information
bigartm committed Mar 6, 2015
2 parents 3598f82 + 2e38f94 commit 1b24c48
Show file tree
Hide file tree
Showing 12 changed files with 411 additions and 131 deletions.
6 changes: 6 additions & 0 deletions docs/ref/messages.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,7 @@ Represents an argument of :c:func:`ArtmAddBatch` operation.
optional Batch batch = 1;
optional int32 timeout_milliseconds = 2;
optional bool reset_scores = 3 [default = false];
optional string batch_file_name = 4;
}

.. attribute:: AddBatchArgs.batch
Expand All @@ -1922,6 +1923,11 @@ Represents an argument of :c:func:`ArtmAddBatch` operation.

An optional flag that defines whether to reset all scores before this operation.

.. attribute:: AddBatchArgs.batch_file_name

An optional value that defines disk location of the batch to add.
You must choose between parameters *batch_file_name* or *batch*
(either of them has to be specified, but not both at the same time).

.. _InvokeIterationArgs:

Expand Down
4 changes: 3 additions & 1 deletion docs/ref/python_interface.txt
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,12 @@ MasterComponent
Remember that some changes of the configuration are not allowed (for example, the :attr:`MasterComponentConfig.disk_path` must not change).
Such configuration parameters must be provided in the constructor of :py:class:`MasterComponent`.

.. py:method:: AddBatch(batch, timeout = -1, reset_scores = False)
.. py:method:: AddBatch(self, batch = None, batch_filename = None, timeout = -1, reset_scores = False)

Adds an instance of :ref:`Batch` class to the processor queue.
Master component creates a copy of the *batch*, so any further changes of the *batch* object will not be picked up.
*batch_filename* is an alternative to file with binary-serialized batch (you must use either *batch* or *batch_filename* option,
but not both at the same time).

This operation awaits until there is enough space in processor queue.
It returns *True* if await succeeded within the timeout, otherwise returns *False*.
Expand Down
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,7 @@ install(FILES python/artm/library.py python/artm/messages_pb2.py python/artm/__i
install(FILES ../datasets/docword.kos.txt ../datasets/vocab.kos.txt DESTINATION python/examples)

FILE(GLOB PYTHON_EXAMPLES python/examples/*.py)
install(FILES ${PYTHON_EXAMPLES} DESTINATION python/examples)
install(FILES ${PYTHON_EXAMPLES} DESTINATION python/examples)

FILE(GLOB PYTHON_EXPERIMENTS python/experiments/*.py)
install(FILES ${PYTHON_EXPERIMENTS} DESTINATION python/experiments)
31 changes: 20 additions & 11 deletions src/artm/core/data_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,27 @@ LocalDataLoader::~LocalDataLoader() {
}

bool LocalDataLoader::AddBatch(const AddBatchArgs& args) {
auto& batch = args.batch();
if (!args.has_batch() && !args.has_batch_file_name()) {
std::string message = "AddBatchArgs.batch or AddBatchArgs.batch_file_name must be specified";
BOOST_THROW_EXCEPTION(InvalidOperation(message));
}

int timeout = args.timeout_milliseconds();
MasterComponentConfig config = instance()->schema()->config();

std::shared_ptr<Batch> modified_batch;
if (config.compact_batches()) {
modified_batch = std::make_shared<Batch>(); // constructor
BatchHelpers::CompactBatch(batch, modified_batch.get());
BatchHelpers::PopulateClassId(modified_batch.get());
std::shared_ptr<Batch> batch = std::make_shared< ::artm::Batch>();
if (args.has_batch_file_name()) {
::artm::core::BatchHelpers::LoadMessage(args.batch_file_name(), batch.get());
::artm::core::BatchHelpers::PopulateClassId(batch.get());
} else {
modified_batch = std::make_shared<Batch>(batch); // copy constructor
BatchHelpers::PopulateClassId(modified_batch.get());
batch = std::make_shared<Batch>(args.batch()); // copy constructor
BatchHelpers::PopulateClassId(batch.get());
}

if (config.compact_batches()) {
std::shared_ptr<Batch> modified_batch = std::make_shared<Batch>(); // constructor
BatchHelpers::CompactBatch(*batch, modified_batch.get());
batch = modified_batch;
}

auto time_start = boost::posix_time::microsec_clock::local_time();
Expand All @@ -129,9 +138,9 @@ bool LocalDataLoader::AddBatch(const AddBatchArgs& args) {
}
}
auto pi = std::make_shared<ProcessorInput>();
pi->mutable_batch()->CopyFrom(*modified_batch);
pi->set_batch_uuid(modified_batch->id());
boost::uuids::uuid uuid = boost::lexical_cast<boost::uuids::uuid>(modified_batch->id());
pi->mutable_batch()->CopyFrom(*batch);
pi->set_batch_uuid(batch->id());
boost::uuids::uuid uuid = boost::lexical_cast<boost::uuids::uuid>(batch->id());
instance_->batch_manager()->AddAndNext(BatchManagerTask(uuid, std::string()));
instance_->processor_queue()->push(pi);

Expand Down
2 changes: 2 additions & 0 deletions src/artm/core/master_component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ MasterComponent::MasterComponent(int id, const MasterComponentConfig& config)
service_endpoint_(nullptr),
instance_(nullptr),
network_client_interface_(nullptr) {
LOG(INFO) << "Creating MasterComponent (id=" << master_id_ << ")...";
Reconfigure(config);
}

MasterComponent::~MasterComponent() {
LOG(INFO) << "Disposing MasterComponent (id=" << master_id_ << ")...";
}

int MasterComponent::id() const {
Expand Down
72 changes: 65 additions & 7 deletions src/artm/messages.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 86 additions & 1 deletion src/artm/messages.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/artm/messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ message AddBatchArgs {
optional Batch batch = 1;
optional int32 timeout_milliseconds = 2;
optional bool reset_scores = 3 [default = false];
optional string batch_file_name = 4;
}

message InvokeIterationArgs {
Expand Down

0 comments on commit 1b24c48

Please sign in to comment.