Skip to content

Commit

Permalink
Merge pull request #100 from sashafrey/master
Browse files Browse the repository at this point in the history
Introduce ThetaMatrix.topics_count field
  • Loading branch information
bigartm committed Jan 4, 2015
2 parents 0ba7260 + 9f8ecbb commit 0195ec0
Show file tree
Hide file tree
Showing 11 changed files with 340 additions and 164 deletions.
3 changes: 3 additions & 0 deletions docs/ref/c_interface.txt
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ ArtmAddBatch
:return: Returns :c:macro:`ARTM_SUCCESS` value if operation succeeded,
otherwise returns one of the :ref:`error codes <error-codes>`.

This operation is only allowed when
:attr:`MasterComponentConfig.modus_operandi` is set to *Local*.

ArtmInvokeIteration
-------------------

Expand Down
8 changes: 7 additions & 1 deletion docs/ref/messages.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,7 @@ Represents a topic model.
.. attribute:: TopicModel.topics_count

A value that describes the number of topics in the topic model.
This value will match the value, defined in the model config.
This value will match :attr:`ModelConfig.topics_count` value, defined in the model config.

.. attribute:: TopicModel.token

Expand Down Expand Up @@ -1467,6 +1467,7 @@ Represents a theta matrix.
repeated int32 item_id = 2;
repeated FloatArray item_weights = 3;
repeated string topic_name = 4;
optional int32 topics_count = 5;
}

.. attribute:: ThetaMatrix.model_name
Expand All @@ -1489,6 +1490,11 @@ Represents a theta matrix.
A set of values that represent the names of the topics, included in this theta matrix.
The names correspond to :attr:`ModelConfig.topic_name`.

.. attribute:: TopicModel.topics_count

A value that describes the number of topics in the topic model.
This value will match :attr:`ModelConfig.topics_count` value, defined in the model config.

.. _CollectionParserConfig:

CollectionParserConfig
Expand Down
2 changes: 2 additions & 0 deletions src/artm/core/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,12 @@ bool BatchHelpers::PopulateThetaMatrixFromCacheEntry(
}
}

theta_matrix->set_topics_count(topic_indices.size());
for (int item_index = 0; item_index < cache.item_id_size(); ++item_index) {
const artm::FloatArray& item_theta = cache.theta(item_index);
if (all_topics) {
theta_matrix->add_item_weights()->CopyFrom(item_theta);
theta_matrix->set_topics_count(item_theta.value_size());
} else {
if (max_topic_index >= item_theta.value_size())
continue; // skip the item to avoid crash.
Expand Down
4 changes: 2 additions & 2 deletions src/artm/core/master_component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ void MasterComponent::AddBatch(const Batch& batch) {
}

if (isInNetworkModusOperandi()) {
BatchHelpers::SaveBatch(batch, config_.get()->disk_path());
return;
BOOST_THROW_EXCEPTION(InvalidOperation(
"AddBatch() is not allowed together with MasterComponentConfig.modus_operandi=Network"));
}

BOOST_THROW_EXCEPTION(ArgumentOutOfRangeException(
Expand Down
89 changes: 65 additions & 24 deletions src/artm/messages.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 33 additions & 1 deletion src/artm/messages.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/artm/messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ message ThetaMatrix {
repeated int32 item_id = 2;
repeated FloatArray item_weights = 3;
repeated string topic_name = 4;
optional int32 topics_count = 5;
}

// Represents a configuration of a collection parser.
Expand Down
6 changes: 5 additions & 1 deletion src/artm_tests/cpp_interface_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ void BasicTest(bool is_network_mode, bool is_proxy_mode, bool online_processing)
}

// Index doc-token matrix
if (!online_processing) master_component->AddBatch(batch);
if (!online_processing && !is_network_mode) master_component->AddBatch(batch);
else if (is_network_mode) artm::SaveBatch(batch, "00b6d631-46a6-4edf-8ef6-016c7b27d9f0.batch");

std::shared_ptr<artm::TopicModel> topic_model;
double expected_normalizer = 0;
Expand Down Expand Up @@ -224,6 +225,7 @@ void BasicTest(bool is_network_mode, bool is_proxy_mode, bool online_processing)
std::shared_ptr< ::artm::ThetaMatrix> theta_matrix = master_component->GetThetaMatrix(args);

EXPECT_EQ(theta_matrix->item_id_size(), nDocs);
EXPECT_EQ(theta_matrix->topics_count(), nTopics);
for (int item_index = 0; item_index < theta_matrix->item_id_size(); ++item_index) {
const ::artm::FloatArray& weights = theta_matrix->item_weights(item_index);
ASSERT_EQ(weights.value_size(), nTopics);
Expand All @@ -240,6 +242,7 @@ void BasicTest(bool is_network_mode, bool is_proxy_mode, bool online_processing)
args.add_topic_index(2); args.add_topic_index(3); // retrieve 2nd and 3rd topic
std::shared_ptr< ::artm::ThetaMatrix> theta_matrix23 = master_component->GetThetaMatrix(args);
EXPECT_EQ(theta_matrix23->item_id_size(), nDocs);
EXPECT_EQ(theta_matrix23->topics_count(), 2);
for (int item_index = 0; item_index < theta_matrix23->item_id_size(); ++item_index) {
const ::artm::FloatArray& weights23 = theta_matrix23->item_weights(item_index);
const ::artm::FloatArray& weights = theta_matrix->item_weights(item_index);
Expand Down Expand Up @@ -270,6 +273,7 @@ void BasicTest(bool is_network_mode, bool is_proxy_mode, bool online_processing)
if (!is_network_mode) {
std::shared_ptr< ::artm::ThetaMatrix> theta_matrix2 = master_component->GetThetaMatrix(args);
EXPECT_EQ(theta_matrix2->item_id_size(), nDocs);
EXPECT_EQ(theta_matrix2->topics_count(), nTopics);
for (int item_index = 0; item_index < theta_matrix2->item_id_size(); ++item_index) {
const ::artm::FloatArray& weights2 = theta_matrix2->item_weights(item_index);
EXPECT_EQ(weights2.value_size(), nTopics);
Expand Down

0 comments on commit 0195ec0

Please sign in to comment.