Skip to content

Commit

Permalink
adding list of layout tokens for the caption of figures
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Nov 9, 2018
1 parent 1b2338a commit 6736736
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
10 changes: 10 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Figure.java
Expand Up @@ -48,9 +48,11 @@ public boolean apply(GraphicObject graphicObject) {
}
};
protected StringBuilder caption = null;
protected List<LayoutToken> captionLayoutTokens = new ArrayList<>();
protected StringBuilder header = null;
protected StringBuilder content = null;
protected StringBuilder label = null;

protected String id = null;
protected URI uri = null;
protected int start = -1; // start position in the full text tokenization
Expand Down Expand Up @@ -90,10 +92,18 @@ public void appendCaption(String cap) {
caption.append(cap);
}

public void appendCaptionLayoutTokens(List<LayoutToken> layoutTokens) {
captionLayoutTokens.addAll(layoutTokens);
}

public String getCaption() {
return caption.toString();
}

public List<LayoutToken> getCaptionLayoutTokens() {
return captionLayoutTokens;
}

public void appendLabel(String lab) {
label.append(lab);
}
Expand Down
Expand Up @@ -57,8 +57,10 @@ public Figure processing(List<LayoutToken> tokenizationFigure, String featureVec
private Figure getExtractionResult(List<LayoutToken> tokenizations, String result) {
TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FIGURE, result, tokenizations);
List<TaggingTokenCluster> clusters = clusteror.cluster();
//System.out.println(result + "\n");
Figure figure = new Figure();
figure.setLayoutTokens(tokenizations);

for (TaggingTokenCluster cluster : clusters) {
if (cluster == null) {
continue;
Expand All @@ -70,6 +72,7 @@ private Figure getExtractionResult(List<LayoutToken> tokenizations, String resul
String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(cluster.concatTokens()));
if (clusterLabel.equals(FIG_DESC)) {
figure.appendCaption(clusterContent);
figure.appendCaptionLayoutTokens(cluster.concatTokens());
} else if (clusterLabel.equals(FIG_HEAD)) {
figure.appendHeader(clusterContent);
} else if (clusterLabel.equals(FIG_LABEL)) {
Expand All @@ -92,7 +95,7 @@ private Figure getExtractionResult(List<LayoutToken> tokenizations, String resul
*/
public org.grobid.core.utilities.Pair<String, String> createTrainingData(List<LayoutToken> tokenizations,
String featureVector, String id) {
//System.out.println(tokenizations.toString() + "\n" );
//System.out.println(tokenizations.toString() + "\n" );
String res = null;
try {
res = label(featureVector);
Expand All @@ -102,7 +105,7 @@ public org.grobid.core.utilities.Pair<String, String> createTrainingData(List<La
if (res == null) {
return new Pair<>(null, featureVector);
}
//System.out.println(res + "\n" );
//System.out.println(res + "\n" );
List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(res);
StringBuilder sb = new StringBuilder();

Expand Down

0 comments on commit 6736736

Please sign in to comment.