-
Notifications
You must be signed in to change notification settings - Fork 37
/
response.h
83 lines (66 loc) · 3.4 KB
/
response.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#ifndef SRC_BERGAMOT_RESPONSE_H_
#define SRC_BERGAMOT_RESPONSE_H_
#include <cassert>
#include <string>
#include <vector>
#include "annotation.h"
#include "data/alignment.h"
#include "data/types.h"
#include "definitions.h"
#include "translator/beam_search.h"
namespace marian {
namespace bergamot {
typedef std::vector<std::vector<float>> Alignment;
/// Response holds AnnotatedText(s) of source-text and translated text,
/// alignment information between source and target sub-words and sentences.
///
/// AnnotatedText provides an API to access markings of (sub)-word and
/// sentences boundaries, which are required to interpret Quality and
/// Alignment (s) at the moment.
struct Response {
/// SentenceQualityScore contains the quality data of a given translated sentence.
/// It includes the confidence (proxied by log probabilities) of each decoded word
/// (higher logprobs imply better-translated words), the ByteRanges of each term,
/// and logprobs of the whole sentence, represented as the mean word scores.
struct SentenceQualityScore {
/// Quality score of each translated word
std::vector<float> wordScores;
/// Each word position in the translated text
std::vector<ByteRange> wordByteRanges;
/// Whole sentence quality score (it is composed by the mean of its words)
float sentenceScore = 0.0;
};
/// Convenience function to obtain number of units translated. Same as
/// `.source.numSentences()` and `.target.numSentences().` The processing of a
/// text of into sentences are handled internally, and this information can be
/// used to iterate through meaningful units of translation for which
/// alignment and quality information are available.
const size_t size() const { return source.numSentences(); }
/// source text and annotations of (sub-)words and sentences.
AnnotatedText source;
/// translated text and annotations of (sub-)words and sentences.
AnnotatedText target;
/// logprob of each word and the total sequence (sentence)
/// normalized by length, for each sentence processed by the translator.
/// Indices correspond to ranges accessible through respective Annotation on
/// source or target.
std::vector<SentenceQualityScore> qualityScores;
/// Alignments between source and target. This is a collection of dense matrices providing
/// P[t][s] = p(source-token s | target token t)
/// with an alignment matrix for each sentence.
std::vector<std::vector<std::vector<float>>> alignments;
/// Returns the source sentence (in terms of byte range) corresponding to sentenceIdx.
///
/// @param [in] sentenceIdx: The index representing the sentence where 0 <= sentenceIdx < Response::size()
ByteRange getSourceSentenceAsByteRange(size_t sentenceIdx) const { return source.sentenceAsByteRange(sentenceIdx); }
/// Returns the translated sentence (in terms of byte range) corresponding to sentenceIdx.
///
/// @param [in] sentenceIdx: The index representing the sentence where 0 <= sentenceIdx < Response::size()
ByteRange getTargetSentenceAsByteRange(size_t sentenceIdx) const { return target.sentenceAsByteRange(sentenceIdx); }
const std::string &getOriginalText() const { return source.text; }
const std::string &getTranslatedText() const { return target.text; }
};
std::vector<Alignment> remapAlignments(const Response &first, const Response &second);
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_RESPONSE_H_