Skip to content

Commit

Permalink
Merge pull request #138 from sashafrey/master
Browse files Browse the repository at this point in the history
Improvements in ThetaSnippetScore and TopTokensScore
  • Loading branch information
bigartm committed Feb 21, 2015
2 parents 7ae26e5 + 0f38242 commit b71de44
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 129 deletions.
11 changes: 8 additions & 3 deletions docs/ref/messages.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,8 @@ Represents a configuration of a theta snippet score.
message ThetaSnippetScoreConfig {
optional string field_name = 1 [default = "@body"]; // obsolete in BigARTM v0.5.8
optional string stream_name = 2 [default = "@global"];
repeated int32 item_id = 3 [packed = true];
repeated int32 item_id = 3 [packed = true]; // obsolete in BigARTM v0.5.8
optional int32 item_count = 4 [default = 10];
}

.. attribute:: ThetaSnippetScoreConfig.field_name
Expand All @@ -1294,8 +1295,12 @@ Represents a configuration of a theta snippet score.

.. attribute:: ThetaSnippetScoreConfig.item_id

A set of values that define items for which theta snippet should be calculated.
Items are identified by the item id.
Obsolete in BigARTM v0.5.8.

.. attribute:: ThetaSnippetScoreConfig.item_count

The number of items to retrieve. ThetaSnippetScore will select last *item_count* processed items
and return their theta vectors.


.. _ThetaSnippetScore:
Expand Down
146 changes: 94 additions & 52 deletions src/artm/messages.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 33 additions & 1 deletion src/artm/messages.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion src/artm/messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,8 @@ message TopTokensScore {
message ThetaSnippetScoreConfig {
optional string field_name = 1 [default = "@body"];
optional string stream_name = 2 [default = "@global"];
repeated int32 item_id = 3 [packed = true];
repeated int32 item_id = 3 [packed = true]; // obsolete in BigARTM v0.5.8
optional int32 item_count = 4 [default = 10];
}


Expand Down
37 changes: 24 additions & 13 deletions src/artm/score_sandbox/theta_snippet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "artm/core/exceptions.h"
#include "artm/core/topic_model.h"
#include "artm/core/protobuf_helpers.h"

namespace artm {
namespace score_sandbox {
Expand All @@ -18,16 +19,10 @@ void ThetaSnippet::AppendScore(
int topics_size = topic_model.topic_size();

ThetaSnippetScore theta_snippet_score;

for (int item_index = 0; item_index < config_.item_id_size(); item_index++) {
if (item_index == item.id()) {
theta_snippet_score.add_item_id(item_index);
auto theta_snippet_item = theta_snippet_score.add_values();
for (int topic_index = 0; topic_index < topics_size; ++topic_index) {
theta_snippet_item->add_value(theta[topic_index]);
}
break;
}
theta_snippet_score.add_item_id(item.id());
auto theta_snippet_item = theta_snippet_score.add_values();
for (int topic_index = 0; topic_index < topics_size; ++topic_index) {
theta_snippet_item->add_value(theta[topic_index]);
}

AppendScore(theta_snippet_score, score);
Expand All @@ -53,12 +48,28 @@ void ThetaSnippet::AppendScore(const Score& score, Score* target) {
BOOST_THROW_EXCEPTION(::artm::core::InternalError(error_message));
}

if (config_.item_count() <= 0 || theta_snippet_score->values_size() == 0)
return;

while (theta_snippet_target->values_size() < config_.item_count()) {
theta_snippet_target->add_item_id(-1);
artm::FloatArray* values_target = theta_snippet_target->add_values();
for (int i = 0; i < theta_snippet_score->values(0).value_size(); ++i)
values_target->add_value(0.0f);
}

for (int item_index = 0; item_index < theta_snippet_score->item_id_size(); item_index++) {
theta_snippet_target->add_item_id(theta_snippet_score->item_id().Get(item_index));
auto theta_snippet_item_target = theta_snippet_target->add_values();
int item_id = theta_snippet_score->item_id(item_index);
if (item_id < 0)
continue;

theta_snippet_target->set_item_id(item_id % config_.item_count(), item_id);
artm::FloatArray* values_target = theta_snippet_target->mutable_values(item_id % config_.item_count());
values_target->Clear();

auto theta_snippet_item_score = theta_snippet_score->values(item_index);
for (int topic_index = 0; topic_index < theta_snippet_item_score.value_size(); topic_index++) {
theta_snippet_item_target->add_value(theta_snippet_item_score.value(topic_index));
values_target->add_value(theta_snippet_item_score.value(topic_index));
}
}
}
Expand Down

0 comments on commit b71de44

Please sign in to comment.