Skip to content


Form check box extraction with XWPFWordExtractor #7

wants to merge 2 commits into from

2 participants


For test issues, we compare automatic filled word forms at least by extracting the documents' text. Therefore it would be great if the XWPFWordExtractor could also extract the values of form check boxes as text to be able to compare their status on text base.

This pull request implements the described feature. Form check boxes in *.docx format will be extracted as |_| for an disabled check box and |X| for an enabled check box.

The Apache Software Foundation member

Hi, this is applied with r1636990, should appear in nightly builds and POI 3.11beta3 and later.


Nice, thanks!


Btw: Will the GitHub mirror be updated from time to time? How is the policy?

The Apache Software Foundation member

Ah ok, I just found it on the trunk branch. Thanks.

@may-bee may-bee closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
23 src/ooxml/java/org/apache/poi/xwpf/usermodel/
@@ -45,12 +45,16 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import org.openxmlformats.schemas.drawingml.x2006.main.CTShapeProperties;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D;
import org.openxmlformats.schemas.drawingml.x2006.main.STShapeType;
+import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
+import org.openxmlformats.schemas.drawingml.x2006.picture.CTPictureNonVisual;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTColor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFFCheckBox;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFldChar;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFonts;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHpsMeasure;
@@ -64,13 +68,12 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTVerticalAlignRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBrClear;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBrType;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STUnderline;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalAlignRun;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
-import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
-import org.openxmlformats.schemas.drawingml.x2006.picture.CTPictureNonVisual;
* XWPFRun object defines a region of text with a common set of properties
@@ -805,6 +808,22 @@ public String toString() {
text.append(((CTText) o).getStringValue());
+ // Complex type evaluation (currently only for extraction of check boxes)
+ if(o instanceof CTFldChar) {
+ CTFldChar ctfldChar = ((CTFldChar)o);
+ if(ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
+ if(ctfldChar.getFfData() != null) {
+ for(CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
+ if(checkBox.getDefault().getVal() == STOnOff.X_1) {
+ text.append("|X|");
+ } else {
+ text.append("|_|");
+ }
+ }
+ }
+ }
+ }
if (o instanceof CTPTab) {
11 src/ooxml/testcases/org/apache/poi/xwpf/extractor/
@@ -363,4 +363,15 @@ public void testBug55733() throws Exception {
+ public void testFetchCheckboxes() throws IOException {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("checkboxes.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+ assertEquals("This is a small test for checkboxes \nunchecked: |_| \n"+
+ "Or checked: |X|\n\n\n\n\n"+
+ "Test a checkbox within a textbox: |_| -> |X|\n\n\nIn Table:\n"+
+ "|_|\t|X|\n\n\nIn Sequence:\n|X||_||X|\n", extractor.getText());
+ extractor.close();
+ }
BIN test-data/document/checkboxes.docx
Binary file not shown.
Something went wrong with that request. Please try again.