Skip to content

Commit

Permalink
Make output scores optional
Browse files Browse the repository at this point in the history
Additional optimizations are possible when the output scores are not
required. For example, we can skip the final LogSoftMax during greedy
search.
  • Loading branch information
guillaumekln committed Apr 1, 2020
1 parent 1e5d549 commit f644971
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 34 deletions.
1 change: 1 addition & 0 deletions cli/translate.cc
Expand Up @@ -87,6 +87,7 @@ int main(int argc, char* argv[]) {
options.min_decoding_length = args["min_sent_length"].as<size_t>();
options.num_hypotheses = args["n_best"].as<size_t>();
options.use_vmap = args["use_vmap"].as<bool>();
options.return_scores = args["with_score"].as<bool>();

std::istream* in = &std::cin;
std::ostream* out = &std::cout;
Expand Down
1 change: 1 addition & 0 deletions docs/python.md
Expand Up @@ -49,6 +49,7 @@ output = translator.translate_batch(
max_decoding_length=250, # Maximum prediction length.
min_decoding_length=1, # Minimum prediction length.
use_vmap=False, # Use the vocabulary mapping file saved in this model.
return_scores=True, # Include the prediction scores in the output.
return_attention=False, # Include the attention vectors in the output.
return_alternatives=False, # Return alternatives at the first unconstrained decoding position.
sampling_topk=1, # Randomly sample predictions from the top K candidates (with beam_size=1).
Expand Down
7 changes: 4 additions & 3 deletions include/ctranslate2/decoding.h
Expand Up @@ -20,7 +20,7 @@ namespace ctranslate2 {
const dim_t min_length,
const std::vector<size_t>* output_ids_map,
std::vector<std::vector<std::vector<size_t>>>& sampled_ids,
std::vector<std::vector<float>>& scores,
std::vector<std::vector<float>>* scores = nullptr,
std::vector<std::vector<std::vector<std::vector<float>>>>* attention = nullptr,
const size_t num_hypotheses = 1) const = 0;
};
Expand All @@ -40,7 +40,7 @@ namespace ctranslate2 {
const dim_t min_length,
const std::vector<size_t>* output_ids_map,
std::vector<std::vector<std::vector<size_t>>>& sampled_ids,
std::vector<std::vector<float>>& scores,
std::vector<std::vector<float>>* scores = nullptr,
std::vector<std::vector<std::vector<std::vector<float>>>>* attention = nullptr,
const size_t num_hypotheses = 1) const override;

Expand All @@ -62,7 +62,7 @@ namespace ctranslate2 {
const dim_t min_length,
const std::vector<size_t>* output_ids_map,
std::vector<std::vector<std::vector<size_t>>>& sampled_ids,
std::vector<std::vector<float>>& scores,
std::vector<std::vector<float>>* scores = nullptr,
std::vector<std::vector<std::vector<std::vector<float>>>>* attention = nullptr,
const size_t num_hypotheses = 1) const override;
};
Expand All @@ -86,6 +86,7 @@ namespace ctranslate2 {
const dim_t min_length,
const size_t num_hypotheses,
const bool return_alternatives,
const bool return_scores,
const bool return_attention);

}
13 changes: 8 additions & 5 deletions include/ctranslate2/translation_result.h
Expand Up @@ -11,20 +11,23 @@ namespace ctranslate2 {
class GenerationResult {
public:
GenerationResult(const size_t num_hypotheses, const bool with_attention); // Empty result.
GenerationResult(std::vector<std::vector<T>> hypotheses,
std::vector<float> scores);
GenerationResult(std::vector<std::vector<T>> hypotheses);
GenerationResult(std::vector<std::vector<T>> hypotheses,
std::vector<float> scores,
std::vector<std::vector<std::vector<float>>> attention);

const std::vector<T>& output() const;
float score() const;

size_t num_hypotheses() const;

const std::vector<T>& output() const;
const std::vector<std::vector<T>>& hypotheses() const;

float score() const;
const std::vector<float>& scores() const;
void set_scores(std::vector<float> scores);
bool has_scores() const;

const std::vector<std::vector<std::vector<float>>>& attention() const;
void set_attention(std::vector<std::vector<std::vector<float>>> attention);
bool has_attention() const;

friend GenerationResult<std::string>
Expand Down
2 changes: 2 additions & 0 deletions include/ctranslate2/translator.h
Expand Up @@ -37,6 +37,8 @@ namespace ctranslate2 {
// beam_size unless return_alternatives is set).
size_t num_hypotheses = 1;

// Store scores in the TranslationResult class.
bool return_scores = true;
// Store attention vectors in the TranslationResult class.
bool return_attention = false;

Expand Down
8 changes: 8 additions & 0 deletions python/tests/test.py
Expand Up @@ -113,6 +113,14 @@ def test_return_attention():
assert len(attention) == 6 # Target length.
assert len(attention[0]) == 6 # Source length.

def test_ignore_scores():
translator = _get_transliterator()
output = translator.translate_batch(
[["آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"]],
beam_size=1,
return_scores=False)
assert "scores" not in output[0][0]

def test_return_alternatives():
translator = _get_transliterator()
output = translator.translate_batch(
Expand Down
8 changes: 7 additions & 1 deletion python/translator.cc
Expand Up @@ -106,6 +106,7 @@ class TranslatorWrapper
options.min_decoding_length = min_decoding_length;
options.num_hypotheses = num_hypotheses;
options.use_vmap = use_vmap;
options.return_scores = with_scores;

if (read_batch_size == 0)
read_batch_size = max_batch_size;
Expand All @@ -128,6 +129,7 @@ class TranslatorWrapper
size_t max_decoding_length,
size_t min_decoding_length,
bool use_vmap,
bool return_scores,
bool return_attention,
bool return_alternatives,
size_t sampling_topk,
Expand All @@ -154,6 +156,7 @@ class TranslatorWrapper
options.min_decoding_length = min_decoding_length;
options.num_hypotheses = num_hypotheses;
options.use_vmap = use_vmap;
options.return_scores = return_scores;
options.return_attention = return_attention;
options.return_alternatives = return_alternatives;

Expand All @@ -165,8 +168,10 @@ class TranslatorWrapper
py::list batch;
for (size_t i = 0; i < result.num_hypotheses(); ++i) {
py::dict hyp;
hyp["score"] = result.scores()[i];
hyp["tokens"] = std_vector_to_py_list(result.hypotheses()[i]);
if (result.has_scores()) {
hyp["score"] = result.scores()[i];
}
if (result.has_attention()) {
py::list attn;
for (const auto& attn_vector : result.attention()[i])
Expand Down Expand Up @@ -278,6 +283,7 @@ PYBIND11_MODULE(translator, m)
py::arg("max_decoding_length")=250,
py::arg("min_decoding_length")=1,
py::arg("use_vmap")=false,
py::arg("return_scores")=true,
py::arg("return_attention")=false,
py::arg("return_alternatives")=false,
py::arg("sampling_topk")=1,
Expand Down
56 changes: 36 additions & 20 deletions src/decoding.cc
Expand Up @@ -80,7 +80,7 @@ namespace ctranslate2 {
const dim_t min_length,
const std::vector<size_t>* output_ids_map,
std::vector<std::vector<std::vector<size_t>>>& sampled_ids,
std::vector<std::vector<float>>& scores,
std::vector<std::vector<float>>* scores,
std::vector<std::vector<std::vector<std::vector<float>>>>* attention,
const size_t num_hypotheses) const {
PROFILE("beam_search");
Expand All @@ -107,8 +107,10 @@ namespace ctranslate2 {
hypotheses.resize(batch_size);
sampled_ids.clear();
sampled_ids.resize(batch_size);
scores.clear();
scores.resize(batch_size);
if (scores) {
scores->clear();
scores->resize(batch_size);
}
if (attention) {
attention->clear();
attention->resize(batch_size);
Expand All @@ -119,7 +121,8 @@ namespace ctranslate2 {
for (dim_t i = 0; i < batch_size; ++i) {
batch_offset[i] = i;
sampled_ids[i].reserve(num_hypotheses);
scores[i].reserve(num_hypotheses);
if (scores)
(*scores)[i].reserve(num_hypotheses);
if (attention)
(*attention)[i].reserve(num_hypotheses);
}
Expand Down Expand Up @@ -258,8 +261,10 @@ namespace ctranslate2 {
for (auto& pair : hypotheses[batch_id]) {
if (sampled_ids[batch_id].size() >= num_hypotheses)
break;
scores[batch_id].push_back(-pair.first);
sampled_ids[batch_id].emplace_back(std::move(pair.second.first));
if (scores) {
(*scores)[batch_id].push_back(-pair.first);
}
if (attention) {
(*attention)[batch_id].emplace_back(std::move(pair.second.second));
}
Expand Down Expand Up @@ -331,7 +336,7 @@ namespace ctranslate2 {
const dim_t min_length,
const std::vector<size_t>* output_ids_map,
std::vector<std::vector<std::vector<size_t>>>& sampled_ids,
std::vector<std::vector<float>>& scores,
std::vector<std::vector<float>>* scores,
std::vector<std::vector<std::vector<std::vector<float>>>>* attention,
const size_t) const {
PROFILE("greedy_search");
Expand All @@ -343,8 +348,10 @@ namespace ctranslate2 {

sampled_ids.clear();
sampled_ids.resize(batch_size);
scores.clear();
scores.resize(batch_size);
if (scores) {
scores->clear();
scores->resize(batch_size);
}
if (attention) {
attention->clear();
attention->resize(batch_size);
Expand All @@ -358,7 +365,8 @@ namespace ctranslate2 {
for (dim_t i = 0; i < batch_size; ++i) {
batch_offset[i] = i;
sampled_ids[i].resize(1);
scores[i].resize(1);
if (scores)
(*scores)[i].resize(1);
if (attention)
(*attention)[i].resize(1);
}
Expand All @@ -374,7 +382,13 @@ namespace ctranslate2 {
state,
&logits,
attention ? &attention_step_device : nullptr);
ops::LogSoftMax()(logits, log_probs);

// Compute log probs only if scores should be returned.
if (scores) {
ops::LogSoftMax()(logits, log_probs);
} else {
log_probs.shallow_copy(logits);
}

// Penalize end_id, if configured.
if (step < min_length)
Expand All @@ -399,8 +413,10 @@ namespace ctranslate2 {
} else {
sample_from.at<int32_t>(i) = true_id;
sampled_ids[batch_id][0].push_back(true_id);
scores[batch_id][0] += best_probs.scalar_at<float>({i});
++count_alive;
if (scores) {
(*scores)[batch_id][0] += best_probs.scalar_at<float>({i});
}
if (attention) {
const auto* attn = attention_step.index<float>({i});
(*attention)[batch_id][0].emplace_back(attn, attn + attention_step.dim(-1));
Expand Down Expand Up @@ -482,6 +498,7 @@ namespace ctranslate2 {
const dim_t min_length,
const size_t num_hypotheses,
const bool return_alternatives,
const bool return_scores,
const bool return_attention) {
dim_t start_step = 0;

Expand Down Expand Up @@ -518,7 +535,7 @@ namespace ctranslate2 {
/*min_length=*/1,
output_ids_map,
expanded_ids,
expanded_scores,
return_scores ? &expanded_scores : nullptr,
return_attention ? &expanded_attention : nullptr,
num_hypotheses);

Expand All @@ -542,7 +559,7 @@ namespace ctranslate2 {
min_length,
output_ids_map,
sampled_ids,
scores,
return_scores ? &scores : nullptr,
return_attention ? &attention : nullptr,
return_alternatives ? 1 : num_hypotheses);

Expand Down Expand Up @@ -571,7 +588,7 @@ namespace ctranslate2 {
ids.insert(ids.begin(), prefix_ids->at(i).begin(), prefix_ids->at(i).end());

// Finalize the score.
if (!expanded_scores.empty())
if (return_scores && !expanded_scores.empty())
scores[i][h] += expanded_scores[i][h];

// Finalize the attention.
Expand All @@ -587,13 +604,12 @@ namespace ctranslate2 {
}
}

GenerationResult<size_t> result(std::move(sampled_ids[i]));
if (return_scores)
result.set_scores(std::move(scores[i]));
if (return_attention)
results.emplace_back(std::move(sampled_ids[i]),
std::move(scores[i]),
std::move(attention[i]));
else
results.emplace_back(std::move(sampled_ids[i]),
std::move(scores[i]));
result.set_attention(std::move(attention[i]));
results.emplace_back(std::move(result));
}

return results;
Expand Down
21 changes: 17 additions & 4 deletions src/translation_result.cc
Expand Up @@ -10,10 +10,8 @@ namespace ctranslate2 {
}

template <typename T>
GenerationResult<T>::GenerationResult(std::vector<std::vector<T>> hypotheses,
std::vector<float> scores)
: _hypotheses(std::move(hypotheses))
, _scores(std::move(scores)) {
GenerationResult<T>::GenerationResult(std::vector<std::vector<T>> hypotheses)
: _hypotheses(std::move(hypotheses)) {
}

template <typename T>
Expand Down Expand Up @@ -50,11 +48,26 @@ namespace ctranslate2 {
return _scores;
}

template <typename T>
void GenerationResult<T>::set_scores(std::vector<float> scores) {
_scores = std::move(scores);
}

template <typename T>
bool GenerationResult<T>::has_scores() const {
return !_scores.empty();
}

template <typename T>
const std::vector<std::vector<std::vector<float>>>& GenerationResult<T>::attention() const {
return _attention;
}

template <typename T>
void GenerationResult<T>::set_attention(std::vector<std::vector<std::vector<float>>> attention) {
_attention = std::move(attention);
}

template <typename T>
bool GenerationResult<T>::has_attention() const {
return !_attention.empty();
Expand Down
1 change: 1 addition & 0 deletions src/translator.cc
Expand Up @@ -329,6 +329,7 @@ namespace ctranslate2 {
options.min_decoding_length,
options.num_hypotheses,
options.return_alternatives,
options.return_scores,
options.return_attention);

// Convert generated ids to tokens.
Expand Down
2 changes: 1 addition & 1 deletion src/translator_pool.cc
Expand Up @@ -118,7 +118,7 @@ namespace ctranslate2 {
stats.num_tokens += hypotheses[0].size();
for (size_t n = 0; n < hypotheses.size(); ++n) {
if (with_scores)
out << scores[n] << " ||| ";
out << (result.has_scores() ? scores[n] : 0) << " ||| ";
for (size_t i = 0; i < hypotheses[n].size(); ++i) {
if (i > 0)
out << ' ';
Expand Down
11 changes: 11 additions & 0 deletions tests/translator_test.cc
Expand Up @@ -327,3 +327,14 @@ TEST(TranslatorTest, InvalidNumHypotheses) {
std::vector<std::string> input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"};
EXPECT_THROW(translator.translate(input, options), std::invalid_argument);
}

TEST(TranslatorTest, IgnoreScore) {
Translator translator = default_translator();
TranslationOptions options;
options.beam_size = 1;
options.return_scores = false;
const std::vector<std::string> input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"};
const TranslationResult result = translator.translate(input, options);
EXPECT_FALSE(result.has_scores());
EXPECT_EQ(result.output(), (std::vector<std::string>{"a", "t", "z", "m", "o", "n"}));
}

0 comments on commit f644971

Please sign in to comment.