Skip to content
This repository has been archived by the owner on Dec 13, 2021. It is now read-only.

Commit

Permalink
Simplified StructuredTranslation to use derivations instead of hyperg…
Browse files Browse the repository at this point in the history
…raphs, now using in KBestExtractor

The StructuredTranslation object is a great idea. I rewrote it here to do the following:

- It now compiles. I'm not sure why it was tucked under $JOSHUA/joshua-6, but I just noticed this, and when I brought it in, it didn't work
-  I rewrote it to be based on a single (k-best) derivation, instead of knowing about the whole hypergraph. We should also build a more general object that knows about all the StructuredTranslation objects (maybe with some renaming
-  I changed it to have an option to only compute each of the items (e.g., features) if it was requested. The non-lazy version remains the default.
-  KBestExtractor now uses these. This is the first step to making a proper API. My thinking is that a large object (maybe Translation?) will contain the k-best extractor and can return StructuredTranslation objects as requested (again, we may want to jiggle the names a bit)
  • Loading branch information
mjpost committed Apr 23, 2016
1 parent bc83a1a commit 8243195
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 93 deletions.
144 changes: 78 additions & 66 deletions src/joshua/decoder/StructuredTranslation.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
import java.util.Map;

import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.FeatureVector;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
import joshua.decoder.io.DeNormalize;
import joshua.decoder.hypergraph.ViterbiFeatureVectorWalkerFunction;
import joshua.decoder.hypergraph.ViterbiOutputStringWalkerFunction;
import joshua.decoder.hypergraph.WalkerFunction;
Expand All @@ -30,77 +33,51 @@
public class StructuredTranslation {

private final Sentence sourceSentence;
private final List<FeatureFunction> featureFunctions;
private final DerivationState derivationRoot;
private final JoshuaConfiguration joshuaConfiguration;

private final String translationString;
private final List<String> translationTokens;
private final float translationScore;
private List<List<Integer>> translationWordAlignments;
private Map<String,Float> translationFeatures;
private final float extractionTime;
private String translationString = null;
private List<String> translationTokens = null;
private String translationWordAlignments = null;
private FeatureVector translationFeatures = null;
private float extractionTime = 0.0f;
private float translationScore = 0.0f;

/* If we need to replay the features, this will get set to true, so that it's only done once */
private boolean featuresReplayed = false;

This comment has been minimized.

Copy link
@fhieber

fhieber Apr 25, 2016

Contributor

this could be solved using the Suppliers pattern as already done in the PackedGrammar class

This comment has been minimized.

Copy link
@mjpost

mjpost Apr 25, 2016

Author Contributor

I wasn't familiar with that, will read up on it.


public StructuredTranslation(final Sentence sourceSentence,
final HyperGraph hypergraph,
final List<FeatureFunction> featureFunctions) {

final long startTime = System.currentTimeMillis();

this.sourceSentence = sourceSentence;
this.featureFunctions = featureFunctions;
this.translationString = extractViterbiString(hypergraph);
this.translationTokens = extractTranslationTokens();
this.translationScore = extractTranslationScore(hypergraph);
this.translationFeatures = extractViterbiFeatures(hypergraph);
this.translationWordAlignments = extractViterbiWordAlignment(hypergraph);
this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
}

private Map<String,Float> extractViterbiFeatures(final HyperGraph hypergraph) {
if (hypergraph == null) {
return emptyMap();
} else {
ViterbiFeatureVectorWalkerFunction viterbiFeatureVectorWalker = new ViterbiFeatureVectorWalkerFunction(featureFunctions, sourceSentence);
walk(hypergraph.goalNode, viterbiFeatureVectorWalker);
return new HashMap<String,Float>(viterbiFeatureVectorWalker.getFeaturesMap());
}
}
final DerivationState derivationRoot,
JoshuaConfiguration config) {

private List<List<Integer>> extractViterbiWordAlignment(final HyperGraph hypergraph) {
if (hypergraph == null) {
return emptyList();
} else {
final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
walk(hypergraph.goalNode, wordAlignmentWalker);
return wordAlignmentWalker.getFinalWordAlignments();
}
}

private float extractTranslationScore(final HyperGraph hypergraph) {
if (hypergraph == null) {
return 0;
} else {
return hypergraph.goalNode.getScore();
}
}

private String extractViterbiString(final HyperGraph hypergraph) {
if (hypergraph == null) {
return sourceSentence.source();
} else {
final WalkerFunction viterbiOutputStringWalker = new ViterbiOutputStringWalkerFunction();
walk(hypergraph.goalNode, viterbiOutputStringWalker);
return viterbiOutputStringWalker.toString();
}
this(sourceSentence, derivationRoot, config, true);
}


private List<String> extractTranslationTokens() {
if (translationString.isEmpty()) {
return emptyList();
} else {
return asList(translationString.split("\\s+"));
public StructuredTranslation(final Sentence sourceSentence,
final DerivationState derivationRoot,
JoshuaConfiguration config,
boolean now) {

final long startTime = System.currentTimeMillis();

this.sourceSentence = sourceSentence;
this.derivationRoot = derivationRoot;
this.joshuaConfiguration = config;

if (now) {
getTranslationString();
getTranslationTokens();
getTranslationScore();
getTranslationFeatures();
getTranslationWordAlignments();
}
this.translationScore = getTranslationScore();

this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
}


// Getters to use upstream

public Sentence getSourceSentence() {
Expand All @@ -112,25 +89,60 @@ public int getSentenceId() {
}

public String getTranslationString() {
return translationString;
if (this.translationString == null) {
if (derivationRoot == null) {
this.translationString = sourceSentence.source();
} else {
this.translationString = derivationRoot.getHypothesis();
}
}
return this.translationString;
}

public List<String> getTranslationTokens() {
if (this.translationTokens == null) {
String trans = getTranslationString();
if (trans.isEmpty()) {
this.translationTokens = emptyList();
} else {
this.translationTokens = asList(trans.split("\\s+"));
}
}

return translationTokens;
}

public float getTranslationScore() {
if (derivationRoot == null) {
this.translationScore = 0.0f;
} else {
this.translationScore = derivationRoot.getModelCost();
}

return translationScore;
}

/**
* Returns a list of target to source alignments.
*/
public List<List<Integer>> getTranslationWordAlignments() {
return translationWordAlignments;
public String getTranslationWordAlignments() {
if (this.translationWordAlignments == null) {
if (derivationRoot == null)
this.translationWordAlignments = "";
else {
WordAlignmentExtractor wordAlignmentExtractor = new WordAlignmentExtractor();
derivationRoot.visit(wordAlignmentExtractor);
this.translationWordAlignments = wordAlignmentExtractor.toString();
}
}

return this.translationWordAlignments;
}

public Map<String,Float> getTranslationFeatures() {
public FeatureVector getTranslationFeatures() {
if (this.translationFeatures == null)
this.translationFeatures = derivationRoot.replayFeatures();

return translationFeatures;
}

Expand Down
47 changes: 20 additions & 27 deletions src/joshua/decoder/hypergraph/KBestExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import joshua.corpus.Vocabulary;
import joshua.decoder.BLEU;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.StructuredTranslation;
import joshua.decoder.chart_parser.ComputeNodeResult;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.FeatureVector;
Expand Down Expand Up @@ -167,33 +168,25 @@ public String getKthHyp(HGNode node, int k) {
// Determine the k-best hypotheses at each HGNode
VirtualNode virtualNode = getVirtualNode(node);
DerivationState derivationState = virtualNode.lazyKBestExtractOnNode(this, k);

// DerivationState derivationState = getKthDerivation(node, k);
if (derivationState != null) {
// ==== read the kbest from each hgnode and convert to output format
FeatureVector features = new FeatureVector();

/*
* To save space, the decoder only stores the model cost, no the individual feature values. If
* you want to output them, you have to replay them.
*/
String hypothesis = null;
if (joshuaConfiguration.outputFormat.contains("%f")
|| joshuaConfiguration.outputFormat.contains("%d"))
features = derivationState.replayFeatures();

hypothesis = derivationState.getHypothesis()
StructuredTranslation translation = new StructuredTranslation(
sentence, derivationState, joshuaConfiguration);

String hypothesis = translation.getTranslationString()
.replaceAll("-lsb-", "[")
.replaceAll("-rsb-", "]")
.replaceAll("-pipe-", "|");


outputString = joshuaConfiguration.outputFormat
.replace("%k", Integer.toString(k))
.replace("%s", hypothesis)
.replace("%S", DeNormalize.processSingleLine(hypothesis))
.replace("%i", Integer.toString(sentence.id()))
.replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString())
.replace("%c", String.format("%.3f", derivationState.cost));
.replace("%f", joshuaConfiguration.moses ? translation.getTranslationFeatures().mosesString() : translation.getTranslationFeatures().toString())
.replace("%c", String.format("%.3f", translation.getTranslationScore()));

if (joshuaConfiguration.outputFormat.contains("%t")) {
outputString = outputString.replace("%t", derivationState.getTree());
Expand Down Expand Up @@ -250,11 +243,11 @@ public void lazyKBestExtractOnHG(HyperGraph hg, int topN, BufferedWriter out) th
return;

for (int k = 1; k <= topN; k++) {
String hypStr = getKthHyp(hg.goalNode, k);
if (null == hypStr)
String translation = getKthHyp(hg.goalNode, k);
if (null == translation)
break;

out.write(hypStr);
out.write(translation);
out.write("\n");
out.flush();
}
Expand Down Expand Up @@ -704,11 +697,11 @@ public int hashCode() {
/**
* Visits every state in the derivation in a depth-first order.
*/
private DerivationVisitor visit(DerivationVisitor visitor) {
public DerivationVisitor visit(DerivationVisitor visitor) {
return visit(visitor, 0);
}

private DerivationVisitor visit(DerivationVisitor visitor, int indent) {
public DerivationVisitor visit(DerivationVisitor visitor, int indent) {

visitor.before(this, indent);

Expand All @@ -733,25 +726,25 @@ private DerivationVisitor visit(DerivationVisitor visitor, int indent) {
return visitor;
}

private String getHypothesis() {
public String getHypothesis() {
return getHypothesis(defaultSide);
}

private String getTree() {
public String getTree() {
return visit(new TreeExtractor()).toString();
}

private String getHypothesis(Side side) {
public String getHypothesis(Side side) {
return visit(new HypothesisExtractor(side)).toString();
}

private FeatureVector replayFeatures() {
public FeatureVector replayFeatures() {
FeatureReplayer fp = new FeatureReplayer();
visit(fp);
return fp.getFeatures();
}

private String getDerivation() {
public String getDerivation() {
return visit(new DerivationExtractor()).toString();
}

Expand Down Expand Up @@ -811,7 +804,7 @@ public interface DerivationVisitor {
*/
void after(DerivationState state, int level);
}

/**
* Extracts the hypothesis from the leaves of the tree using the generic (depth-first) visitor.
* Since we're using the visitor, we can't just print out the words as we see them. We have to
Expand Down Expand Up @@ -878,7 +871,7 @@ public String toString() {
return outputs.pop().replaceAll("<s> ", "").replace(" </s>", "");
}
}

/**
* Assembles a Penn treebank format tree for a given derivation.
*/
Expand Down

0 comments on commit 8243195

Please sign in to comment.