Permalink
Browse files

Tag as 3.0.1-RC3

git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_0_1_RC3@551531 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent cd3e480 commit 2949f09894a38b9d5a91af57039224a9f9e46cdd @Gagravarr Gagravarr committed Jun 28, 2007
Showing with 522 additions and 18 deletions.
  1. +12 −1 legal/NOTICE
  2. +1 −0 src/documentation/content/xdocs/book.xml
  3. +4 −1 src/documentation/content/xdocs/changes.xml
  4. +34 −0 src/documentation/content/xdocs/hdgf/book.xml
  5. +98 −0 src/documentation/content/xdocs/hdgf/index.xml
  6. +1 −1 src/documentation/content/xdocs/hslf/book.xml
  7. +2 −2 src/documentation/content/xdocs/hslf/index.xml
  8. +1 −1 src/documentation/content/xdocs/hssf/how-to.xml
  9. +1 −1 src/documentation/content/xdocs/hwpf/index.xml
  10. +2 −2 src/documentation/content/xdocs/hwpf/quick-guide.xml
  11. +12 −1 src/documentation/content/xdocs/index.xml
  12. +4 −1 src/documentation/content/xdocs/status.xml
  13. +30 −3 src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
  14. +8 −1 src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkFactory.java
  15. +4 −0 src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java
  16. +4 −0 src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java
  17. +4 −0 src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java
  18. +8 −0 src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java
  19. +114 −0 src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
  20. +5 −0 src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
  21. +1 −1 src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java
  22. +5 −2 src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java
  23. +107 −0 src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
  24. +60 −0 src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java
View
@@ -1,5 +1,16 @@
-Apache Jakarta POI
+Apache POI
Copyright 2001-2007 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
+
+
+Unit testing support is provided by JUnit, under the
+Common Public License Version 1.0:
+ http://www.opensource.org/licenses/cpl.php
+See http://www.junit.org/
+
+Small parts of the POI component HDGF are based on VSDump,
+and are under the GNU General Public Licence version 3 (GPL v3):
+ http://gplv3.fsf.org/
+See http://www.gnome.ru/projects/vsdump_en.html
@@ -39,6 +39,7 @@
<menu-item label="HWPF" href="hwpf/index.html"/>
<menu-item label="HPSF" href="hpsf/index.html"/>
<menu-item label="HSLF" href="hslf/index.html"/>
+ <menu-item label="HDGF" href="hdgf/index.html"/>
<menu-item label="POI-Ruby" href="poi-ruby.html"/>
<menu-item label="POI-Utils" href="utils/index.html"/>
<menu-item label="Download" href="ext:download"/>
@@ -35,7 +35,7 @@
<person id="YK" name="Yegor Kozlov" email="yegor@apache.org"/>
</devs>
- <release version="3.0.1-FINAL" date="2007-06-15">
+ <release version="3.0.1-FINAL" date="2007-07-05">
<action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
<action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
<action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@@ -44,6 +44,9 @@
<action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
<action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
<action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
+ <action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
+ <action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
+ <action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
</release>
<release version="3.0-FINAL" date="2007-05-18">
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!--
+ ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ====================================================================
+-->
+<!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
+
+<book software="POI Project"
+ title="HGDF"
+ copyright="@year@ POI Project">
+
+ <menu label="Apache POI">
+ <menu-item label="Top" href="../index.html"/>
+ </menu>
+
+ <menu label="HDGF">
+ <menu-item label="Overview" href="index.html"/>
+ </menu>
+
+</book>
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ====================================================================
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
+
+<document>
+ <header>
+ <title>POI-HDGF - Java API To Access Microsoft Visio Format Files</title>
+ <subtitle>Overview</subtitle>
+ <authors>
+ <person name="Nick Burch" email="nick at apache dot org"/>
+ </authors>
+ </header>
+
+ <body>
+ <section>
+ <title>Overview</title>
+
+ <p>HDGF is the POI Project's pure Java implementation of the Visio file format.</p>
+ <p>Currently, HDGF provides a low-level, read-only api for
+ accessing Visio documents. It also provides a
+ <link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/">way</link>
+ to extract the textual content from a file.
+ </p>
+ <p>At this time, there is no <em>usermodel</em> api or similar,
+ only low level access to the streams, chunks and chunk commands.
+ Users are advised to check the unit tests to see how everything
+ works. They are also well advised to read the documentation
+ supplied with
+ <link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
+ to get a feel for how Visio files are structured.</p>
+ <p>To get a feel for the contents of a file, and to track down
+ where data of interest is stored, HDGF comes with
+ <link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/dev/">VSDDumper</link>
+ to print out the contents of the file. Users should also make
+ use of
+ <link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
+ to probe the structure of files.</p>
+ <note>
+ This code currently lives the
+ <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
+ of the POI SVN repository.
+ Ensure that you have the scratchpad jar or the scratchpad
+ build area in your
+ classpath before experimenting with this code.
+ </note>
+
+ <section>
+ <title>Steps required for write support</title>
+ <p>Currently, HDGF is only able to read visio files, it is
+ not able to write them back out again. We believe the
+ following are the steps that would need to be taken to
+ implement it.</p>
+ <ol>
+ <li>Re-write the decompression support in LZW4HDGF to be
+ less opaque, and also under the ASL.</li>
+ <li>Add compression support to the new LZw4HDGF.</li>
+ <li>Have HDGF just write back the raw bytes it read in, and
+ have a test to ensure the file is un-changed.</li>
+ <li>Have HDGF generate the bytes to write out from the
+ Stream stores, using the compressed data as appropriate,
+ without re-compressing. Plus test to ensure file is
+ un-changed.</li>
+ <li>Have HDGF generate the bytes to write out from the
+ Stream stores, re-compressing any streams that were
+ decompressed. Plus test to ensure file is un-changed.</li>
+ <li>Have HDGF re-generate the offsets in pointers for the
+ locations of the streams. Plus test to ensure file is
+ un-changed.</li>
+ <li>Have HDGF re-generate the bytes for all the chunks, from
+ the chunk commands. Tests to ensure the chunks are
+ serialized properly, and then that the file is un-changed</li>
+ <li>Alter the data of one command, but keep it the same
+ length, and check visio can open the file when written
+ out.</li>
+ <li>Alter the data of one command, to a new length, and
+ check that visio can open the file when written out.</li>
+ </ol>
+ </section>
+ </section>
+ </body>
+</document>
@@ -20,7 +20,7 @@
<!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
<book software="POI Project"
- title="HSSF"
+ title="HSLF"
copyright="@year@ POI Project">
<menu label="Apache POI">
@@ -34,12 +34,12 @@
<title>Overview</title>
<p>HSLF is the POI Project's pure Java implementation of the Powerpoint file format.</p>
- <p>HSSF provides a way to read powerpoint presentations, and extract text from it.
+ <p>HSLF provides a way to read powerpoint presentations, and extract text from it.
It also provides some (currently limited) edit capabilities.
</p>
<note>
This code currently lives the
- <link href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">scratchpad area</link>
+ <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
of the POI SVN repository.
Ensure that you have the scratchpad jar or the scratchpad
build area in your
@@ -460,7 +460,7 @@ some of the rows or cells. It can be found at
<code>/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java</code>,
and may be called on the command line, or from within your own code.
The latest version is always available from
-<link href="http://svn.apache.org/repos/asf/jakarta/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
+<link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
</p>
<p>
<em>This code is currently in the scratchpad section, so you will either
@@ -38,7 +38,7 @@
to pure Java.</p>
<p>HWPF is still in early development. It is in the <link
- href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
+ href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
scratchpad section of the SVN.</link> You will need to ensure you
either have a recent SVN checkout, or a recent SVN nightly build
(including the scratchpad jar!)</p>
@@ -30,7 +30,7 @@
<body>
<p>HWPF is still in early development. It is in the <link
- href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
+ href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
scratchpad section of the SVN.</link> You will need to ensure you
either have a recent SVN checkout, or a recent SVN nightly build
(including the scratchpad jar!)</p>
@@ -68,7 +68,7 @@ can then get text and other properties.
<section><title>Further Examples</title>
<p>For now, the best source of additional examples is in the unit
tests. <link
- href="http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
+ href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
Browse the HWPF unit tests.</link>
</p>
</section>
@@ -38,6 +38,10 @@
<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">download</link>
the source and binaries from your
<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">local mirror</link>.</p>
+ <p>We would also like to confirm that verion 3.0 of Apache POI does
+ <em>not</em> contain any viruses. Users of broken virus checkers
+ which do detect a 94 byte file, sci_cec.db, as containing one are
+ advised to contact their vendor for a fix.</p>
</section>
<section><title>Purpose</title>
@@ -107,12 +111,19 @@
development. Jump in!</p>
</section>
<section><title>HSLF for PowerPoint Documents</title>
- <p>HWSL is our port of the Microsoft PowerPoint 97(-2003) file format to pure
+ <p>HSLF is our port of the Microsoft PowerPoint 97(-2003) file format to pure
Java. It supports read and write capabilities of some, but not yet all
of the core records. Please see <link
href="./hslf/index.html">the HSLF project page for more
information</link>.</p>
</section>
+ <section><title>HDGF for Visio Documents</title>
+ <p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
+ Java. It currently only supports reading at a very low level, and
+ simple text extraction. Please see <link
+ href="./hdgf/index.html">the HDGF project page for more
+ information</link>.</p>
+ </section>
<section><title>HPSF for Document Properties</title>
<p>HPSF is our port of the OLE 2 property set format to pure
Java. Property sets are mostly use to store a document's properties
@@ -32,7 +32,7 @@
</developers>
<changes>
- <release version="3.0.1-FINAL" date="2007-06-15">
+ <release version="3.0.1-FINAL" date="2007-07-05">
<action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
<action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
<action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@@ -41,6 +41,9 @@
<action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
<action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
<action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
+ <action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
+ <action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
+ <action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
</release>
<release version="3.0-FINAL" date="2007-05-18">
@@ -20,6 +20,9 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition;
import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.StringUtil;
/**
* Base of all chunks, which hold data, flags etc
@@ -44,6 +47,9 @@ Licensed to the Apache Software Foundation (ASF) under one or more
/** The name of the chunk, as found from the commandDefinitions */
private String name;
+ /** For logging warnings about the structure of the file */
+ private POILogger logger = POILogFactory.getLogger(Chunk.class);
+
public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) {
this.header = header;
this.trailer = trailer;
@@ -148,7 +154,9 @@ protected void processCommands() {
// Check we seem to have enough data
if(offset >= contents.length) {
- System.err.println("Command offset " + offset + " past end of data at " + contents.length);
+ logger.log(POILogger.WARN,
+ "Command offset " + offset + " past end of data at " + contents.length
+ );
continue;
}
@@ -167,9 +175,27 @@ protected void processCommands() {
LittleEndian.getDouble(contents, offset)
);
break;
+ case 12:
+ // A Little Endian String
+ // Starts 8 bytes into the data segment
+ // Ends at end of data, or 00 00
+ int startsAt = 8;
+ int endsAt = startsAt;
+ for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
+ if(contents[j] == 0 && contents[j+1] == 0) {
+ endsAt = j;
+ }
+ }
+ if(endsAt == startsAt) {
+ endsAt = contents.length;
+ }
+
+ int strLen = (endsAt-startsAt) / 2;
+ command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
+ break;
case 25:
command.value = new Short(
- LittleEndian.getShort(contents, offset)
+ LittleEndian.getShort(contents, offset)
);
break;
case 26:
@@ -188,7 +214,8 @@ protected void processCommands() {
break;
default:
- //System.err.println("Warning - Command of type " + type + " not processed!");
+ logger.log(POILogger.INFO,
+ "Command of type " + type + " not processed!");
}
// Add to the array
@@ -24,6 +24,9 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import java.util.Hashtable;
import java.util.StringTokenizer;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
/**
* Factor class to create the appropriate chunks, which
* needs the version of the file to process the chunk header
@@ -42,6 +45,9 @@ Licensed to the Apache Software Foundation (ASF) under one or more
private static String chunkTableName =
"/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl";
+ /** For logging problems we spot with the file */
+ private POILogger logger = POILogFactory.getLogger(ChunkFactory.class);
+
public ChunkFactory(int version) throws IOException {
this.version = version;
@@ -107,7 +113,8 @@ public Chunk createChunk(byte[] data, int offset) {
// Check we have enough data, and tweak the header size
// as required
if(endOfDataPos > data.length) {
- System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
+ logger.log(POILogger.WARN,
+ "Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
endOfDataPos = data.length;
header.length = data.length - offset - header.getSizeInBytes();
Oops, something went wrong.

0 comments on commit 2949f09

Please sign in to comment.