Skip to content

Commit

Permalink
Bugzilla 53205 - Fix some parsing errors and encoding issues in HDGF
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1365638 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Yegor Kozlov committed Jul 25, 2012
1 parent 3132801 commit e971e30
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 70 deletions.
1 change: 1 addition & 0 deletions src/documentation/content/xdocs/status.xml
Expand Up @@ -34,6 +34,7 @@

<changes>
<release version="3.9-beta1" date="2012-??-??">
<action dev="poi-developers" type="fix">53205 - Fix some parsing errors and encoding issues in HDGF </action>
<action dev="poi-developers" type="add">53204 - Improved performanceof PageSettingsBlock in HSSF </action>
<action dev="poi-developers" type="add">53500 - Getter for repeating rows and columns</action>
<action dev="poi-developers" type="fix">53369 - Fixed tests failing on JDK 1.7</action>
Expand Down
126 changes: 66 additions & 60 deletions src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
Expand Up @@ -161,70 +161,76 @@ protected void processCommands() {
continue;
}

// Process
switch(type) {
// Types 0->7 = a flat at bit 0->7
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
int val = contents[offset] & (1<<type);
command.value = Boolean.valueOf(val > 0);
break;
case 8:
command.value = Byte.valueOf(contents[offset]);
break;
case 9:
command.value = new Double(
LittleEndian.getDouble(contents, offset)
);
break;
case 12:
// A Little Endian String
// Starts 8 bytes into the data segment
// Ends at end of data, or 00 00

// Ensure we have enough data
if(contents.length < 8) {
command.value = "";
try {
// Process
switch(type) {
// Types 0->7 = a flat at bit 0->7
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
int val = contents[offset] & (1<<type);
command.value = Boolean.valueOf(val > 0);
break;
}

// Find the end point
int startsAt = 8;
int endsAt = startsAt;
for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
if(contents[j] == 0 && contents[j+1] == 0) {
endsAt = j;
case 8:
command.value = Byte.valueOf(contents[offset]);
break;
case 9:
command.value = new Double(
LittleEndian.getDouble(contents, offset)
);
break;
case 12:
// A Little Endian String
// Starts 8 bytes into the data segment
// Ends at end of data, or 00 00

// Ensure we have enough data
if(contents.length < 8) {
command.value = "";
break;
}
}
if(endsAt == startsAt) {
endsAt = contents.length;
}

int strLen = (endsAt-startsAt) / 2;
command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
break;
case 25:
command.value = Short.valueOf(
LittleEndian.getShort(contents, offset)
);
break;
case 26:
command.value = Integer.valueOf(
LittleEndian.getInt(contents, offset)
);
break;

// Types 11 and 21 hold the offset to the blocks
case 11: case 21:
if(offset < contents.length - 3) {
int bOffset = (int)LittleEndian.getUInt(contents, offset);
BlockOffsetCommand bcmd = (BlockOffsetCommand)command;
bcmd.setOffset(bOffset);
}
break;
// Find the end point
int startsAt = 8;
int endsAt = startsAt;
for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
if(contents[j] == 0 && contents[j+1] == 0) {
endsAt = j;
}
}
if(endsAt == startsAt) {
endsAt = contents.length;
}

default:
logger.log(POILogger.INFO,
"Command of type " + type + " not processed!");
int strLen = endsAt - startsAt;
command.value = new String(contents, startsAt, strLen, header.getChunkCharset().name());
break;
case 25:
command.value = Short.valueOf(
LittleEndian.getShort(contents, offset)
);
break;
case 26:
command.value = Integer.valueOf(
LittleEndian.getInt(contents, offset)
);
break;

// Types 11 and 21 hold the offset to the blocks
case 11: case 21:
if(offset < contents.length - 3) {
int bOffset = (int)LittleEndian.getUInt(contents, offset);
BlockOffsetCommand bcmd = (BlockOffsetCommand)command;
bcmd.setOffset(bOffset);
}
break;

default:
logger.log(POILogger.INFO,
"Command of type " + type + " not processed!");
}
}
catch (Exception e) {
logger.log(POILogger.ERROR, "Unexpected error processing command, ignoring and continuing. Command: " +
command, e);
}

// Add to the array
Expand Down
Expand Up @@ -19,6 +19,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more

import org.apache.poi.util.LittleEndian;

import java.nio.charset.Charset;

/**
* A chunk header
*/
Expand Down Expand Up @@ -80,6 +82,7 @@ public static int getHeaderSize(int documentVersion) {
public abstract int getSizeInBytes();
public abstract boolean hasTrailer();
public abstract boolean hasSeparator();
public abstract Charset getChunkCharset();

/**
* Returns the ID/IX of the chunk
Expand Down
Expand Up @@ -17,6 +17,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more

package org.apache.poi.hdgf.chunks;

import java.nio.charset.Charset;

/**
* A chunk header from v11+
*/
Expand All @@ -42,4 +44,9 @@ public boolean hasSeparator() {

return false;
}

@Override
public Charset getChunkCharset() {
return Charset.forName("UTF-16LE");
}
}
Expand Up @@ -17,6 +17,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more

package org.apache.poi.hdgf.chunks;

import java.nio.charset.Charset;

/**
* A chunk header from v4 or v5
*/
Expand Down Expand Up @@ -54,4 +56,9 @@ public boolean hasSeparator() {
// V4 and V5 never has separators
return false;
}

@Override
public Charset getChunkCharset() {
return Charset.forName("ASCII");
}
}
Expand Up @@ -17,6 +17,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more

package org.apache.poi.hdgf.chunks;

import java.nio.charset.Charset;

/**
* A chunk header from v6
*/
Expand Down Expand Up @@ -59,4 +61,9 @@ public boolean hasSeparator() {
// V6 never has separators
return false;
}

@Override
public Charset getChunkCharset() {
return Charset.forName("ASCII");
}
}
26 changes: 16 additions & 10 deletions src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
Expand Up @@ -52,19 +52,25 @@ public void findChunks() {

int pos = 0;
byte[] contents = getStore().getContents();
while(pos < contents.length) {
// Ensure we have enough data to create a chunk from
int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
if(pos+headerSize <= contents.length) {
Chunk chunk = chunkFactory.createChunk(contents, pos);
chunksA.add(chunk);
try {
while(pos < contents.length) {
// Ensure we have enough data to create a chunk from
int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
if(pos+headerSize <= contents.length) {
Chunk chunk = chunkFactory.createChunk(contents, pos);
chunksA.add(chunk);

pos += chunk.getOnDiskSize();
} else {
System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
pos = contents.length;
pos += chunk.getOnDiskSize();
} else {
System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
pos = contents.length;
}
}
}
catch (Exception e)
{
System.err.println("Failed to create chunk at " + pos + ", ignoring rest of data." + e);
}

chunks = chunksA.toArray(new Chunk[chunksA.size()]);
}
Expand Down
25 changes: 25 additions & 0 deletions src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java
Expand Up @@ -17,6 +17,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more

package org.apache.poi.hdgf;

import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hdgf.streams.PointerContainingStream;
import org.apache.poi.hdgf.streams.TrailerStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
Expand Down Expand Up @@ -88,4 +89,28 @@ public void DISABLEDtestAIOOB() throws Exception {
HDGFDiagram hdgf = new HDGFDiagram(fs);
assertNotNull(hdgf);
}

public void testV6NonUtf16LE() throws Exception {
fs = new POIFSFileSystem(_dgTests.openResourceAsStream("v6-non-utf16le.vsd"));

HDGFDiagram hdgf = new HDGFDiagram(fs);
assertNotNull(hdgf);

VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
String text = textExtractor.getText().replace("\u0000", "").trim();

assertEquals("Table\n\n\nPropertySheet\n\n\n\nPropertySheetField", text);
}

public void testUtf16LE() throws Exception {
fs = new POIFSFileSystem(_dgTests.openResourceAsStream("Test_Visio-Some_Random_Text.vsd"));

HDGFDiagram hdgf = new HDGFDiagram(fs);
assertNotNull(hdgf);

VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
String text = textExtractor.getText().trim();

assertEquals("text\nView\nTest View\nI am a test view\nSome random text, on a page", text);
}
}

0 comments on commit e971e30

Please sign in to comment.