Remove marian entities from Response

Towards #77. Response previously required marian-deep objects like histories and vocabs for construction. It has been decided that this structure needs to be exposed and should not have marian-internals. This commit is the final nail in the coffin of Response as a marian object concept. The construction responsiblity of Response is now moved to a ResponseBuilder class.
browsermt · Apr 7, 2021 · a4be691 · a4be691
1 parent 828c6da
commit a4be691
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 113 deletions.
diff --git a/src/translator/response.cpp b/src/translator/response.cpp
@@ -6,101 +6,5 @@
 #include <utility>
 
 namespace marian {
-namespace bergamot {
-
-Response::Response(AnnotatedText &&source, Histories &&histories,
-                   std::vector<Ptr<Vocab const>> &vocabs)
-    : source(std::move(source)) {
-  // Reserving length at least as much as source_ seems like a reasonable thing
-  // to do to avoid reallocations.
-  target.text.reserve(source.text.size());
-
-  // In a first step, the decoded units (individual senteneces) are compiled
-  // into a huge string. This is done by computing indices first and appending
-  // to the string as each sentences are decoded.
-  std::vector<std::pair<size_t, size_t>> translationRanges;
-  std::vector<size_t> sentenceBegins;
-
-  size_t offset{0};
-  bool first{true};
-
-  for (auto &history : histories) {
-    // TODO(jerin): Change hardcode of nBest = 1
-    NBestList onebest = history->nBest(1);
-
-    Result result = onebest[0]; // Expecting only one result;
-    Words words = std::get<0>(result);
-    auto targetVocab = vocabs.back();
-
-    std::string decoded;
-    std::vector<string_view> targetMappings;
-    targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
-
-    if (first) {
-      first = false;
-    } else {
-      target.text += " ";
-      ++offset;
-    }
-
-    sentenceBegins.push_back(translationRanges.size());
-    target.text += decoded;
-    auto decodedStringBeginMarker = targetMappings.front().begin();
-    for (auto &sview : targetMappings) {
-      size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
-      translationRanges.emplace_back(startIdx, startIdx + sview.size());
-    }
-
-    offset += decoded.size();
-
-    // Alignments
-    // TODO(jerinphilip): The following double conversion might not be
-    // necessary. Hard alignment can directly be exported, but this would mean
-    // WASM bindings for a structure deep within marian source.
-    auto hyp = std::get<1>(result);
-    auto softAlignment = hyp->tracebackAlignment();
-    auto hardAlignment = data::ConvertSoftAlignToHardAlign(
-        softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
-                                            // configurable parameter.
-
-    Alignment unified_alignment;
-    for (auto &p : hardAlignment) {
-      unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
-    }
-
-    alignments.push_back(std::move(unified_alignment));
-
-    // Quality scores: Sequence level is obtained as normalized path scores.
-    // Word level using hypothesis traceback. These are most-likely logprobs.
-    auto normalizedPathScore = std::get<2>(result);
-    auto wordQualities = hyp->tracebackWordScores();
-    wordQualities.pop_back();
-    qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
-  }
-
-  // Once we have the indices in translation (which might be resized a few
-  // times) ready, we can prepare and store the string_view as annotations
-  // instead. This is accomplished by iterating over available sentences using
-  // sentenceBegin and using addSentence(...) API from Annotation.
-
-  for (size_t i = 1; i <= sentenceBegins.size(); i++) {
-    std::vector<string_view> targetMappings;
-    size_t begin = sentenceBegins[i - 1];
-    size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
-                                                   : sentenceBegins[i];
-
-    for (size_t idx = begin; idx < safe_end; idx++) {
-      auto &p = translationRanges[idx];
-      size_t begin_idx = p.first;
-      size_t end_idx = p.second;
-
-      const char *data = &target.text[begin_idx];
-      size_t size = end_idx - begin_idx;
-      targetMappings.emplace_back(data, size);
-    }
-
-    target.addSentence(targetMappings);
-  }
-}
-} // namespace bergamot
+namespace bergamot {} // namespace bergamot
 } // namespace marian
diff --git a/src/translator/response.h b/src/translator/response.h
@@ -40,13 +40,11 @@ struct Quality {
 /// AnnotatedText provides an API to access markings of (sub)-word and
 /// sentences boundaries, which are required to interpret Quality and
 /// Alignment (s) at the moment.
-class Response {
+struct Response {
 
 public:
   /// Empty constructor, harmoniously existing for now.
   Response(){};
-  Response(AnnotatedText &&source, Histories &&histories,
-           std::vector<Ptr<Vocab const>> &vocabs);
 
   /// \cond HIDDEN_PUBLIC
   // Move constructor.

diff --git a/src/translator/response_builder.h b/src/translator/response_builder.h
@@ -37,19 +37,6 @@ class ResponseBuilder {
   void operator()(Histories &&histories) {
     // TODO(jerinphilip) load RequestParams into options and turn build
     // functions on or off.
-    // PART 1: Freeze Response and fix Request pipeline.
-    // existingBuild(std::move(histories));
-
-    // PART 2: Uncomment below and test the other half.
-    replacementBuild(std::move(histories));
-  }
-
-  void existingBuild(Histories &&histories) {
-    Response response(std::move(source_), std::move(histories), *vocabs_);
-    promise_.set_value(std::move(response));
-  }
-
-  void replacementBuild(Histories &&histories) {
     // params_ is unused, but we can try something here.
     ABORT_IF(source_.numSentences() != histories.size(),
              "Mismatch in source and translated sentences");