Skip to content

Commit

Permalink
Remove marian entities from Response
Browse files Browse the repository at this point in the history
Towards #77.

Response previously required marian-deep objects like histories and
vocabs for construction. It has been decided that this structure needs
to be exposed and should not have marian-internals. This commit is the
final nail in the coffin of Response as a marian object concept. The
construction responsiblity of Response is now moved to a ResponseBuilder
class.
  • Loading branch information
Jerin Philip committed Apr 7, 2021
1 parent 828c6da commit a4be691
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 113 deletions.
98 changes: 1 addition & 97 deletions src/translator/response.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,101 +6,5 @@
#include <utility>

namespace marian {
namespace bergamot {

Response::Response(AnnotatedText &&source, Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs)
: source(std::move(source)) {
// Reserving length at least as much as source_ seems like a reasonable thing
// to do to avoid reallocations.
target.text.reserve(source.text.size());

// In a first step, the decoded units (individual senteneces) are compiled
// into a huge string. This is done by computing indices first and appending
// to the string as each sentences are decoded.
std::vector<std::pair<size_t, size_t>> translationRanges;
std::vector<size_t> sentenceBegins;

size_t offset{0};
bool first{true};

for (auto &history : histories) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);

Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
auto targetVocab = vocabs.back();

std::string decoded;
std::vector<string_view> targetMappings;
targetVocab->decodeWithByteRanges(words, decoded, targetMappings);

if (first) {
first = false;
} else {
target.text += " ";
++offset;
}

sentenceBegins.push_back(translationRanges.size());
target.text += decoded;
auto decodedStringBeginMarker = targetMappings.front().begin();
for (auto &sview : targetMappings) {
size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
translationRanges.emplace_back(startIdx, startIdx + sview.size());
}

offset += decoded.size();

// Alignments
// TODO(jerinphilip): The following double conversion might not be
// necessary. Hard alignment can directly be exported, but this would mean
// WASM bindings for a structure deep within marian source.
auto hyp = std::get<1>(result);
auto softAlignment = hyp->tracebackAlignment();
auto hardAlignment = data::ConvertSoftAlignToHardAlign(
softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
// configurable parameter.

Alignment unified_alignment;
for (auto &p : hardAlignment) {
unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
}

alignments.push_back(std::move(unified_alignment));

// Quality scores: Sequence level is obtained as normalized path scores.
// Word level using hypothesis traceback. These are most-likely logprobs.
auto normalizedPathScore = std::get<2>(result);
auto wordQualities = hyp->tracebackWordScores();
wordQualities.pop_back();
qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
}

// Once we have the indices in translation (which might be resized a few
// times) ready, we can prepare and store the string_view as annotations
// instead. This is accomplished by iterating over available sentences using
// sentenceBegin and using addSentence(...) API from Annotation.

for (size_t i = 1; i <= sentenceBegins.size(); i++) {
std::vector<string_view> targetMappings;
size_t begin = sentenceBegins[i - 1];
size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
: sentenceBegins[i];

for (size_t idx = begin; idx < safe_end; idx++) {
auto &p = translationRanges[idx];
size_t begin_idx = p.first;
size_t end_idx = p.second;

const char *data = &target.text[begin_idx];
size_t size = end_idx - begin_idx;
targetMappings.emplace_back(data, size);
}

target.addSentence(targetMappings);
}
}
} // namespace bergamot
namespace bergamot {} // namespace bergamot
} // namespace marian
4 changes: 1 addition & 3 deletions src/translator/response.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,11 @@ struct Quality {
/// AnnotatedText provides an API to access markings of (sub)-word and
/// sentences boundaries, which are required to interpret Quality and
/// Alignment (s) at the moment.
class Response {
struct Response {

public:
/// Empty constructor, harmoniously existing for now.
Response(){};
Response(AnnotatedText &&source, Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs);

/// \cond HIDDEN_PUBLIC
// Move constructor.
Expand Down
13 changes: 0 additions & 13 deletions src/translator/response_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,6 @@ class ResponseBuilder {
void operator()(Histories &&histories) {
// TODO(jerinphilip) load RequestParams into options and turn build
// functions on or off.
// PART 1: Freeze Response and fix Request pipeline.
// existingBuild(std::move(histories));

// PART 2: Uncomment below and test the other half.
replacementBuild(std::move(histories));
}

void existingBuild(Histories &&histories) {
Response response(std::move(source_), std::move(histories), *vocabs_);
promise_.set_value(std::move(response));
}

void replacementBuild(Histories &&histories) {
// params_ is unused, but we can try something here.
ABORT_IF(source_.numSentences() != histories.size(),
"Mismatch in source and translated sentences");
Expand Down

0 comments on commit a4be691

Please sign in to comment.