diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 3a7ca03fc21..95a567bb5b5 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -249,7 +249,25 @@ private static String getWorkbookDirEntryName(DirectoryNode directory) { public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes) throws IOException { - super(directory, fs); + this(directory, preserveNodes); + } + /** + * given a POI POIFSFileSystem object, and a specific directory + * within it, read in its Workbook and populate the high and + * low level models. If you're reading in a workbook...start here. + * + * @param directory the POI filesystem directory to process from + * @param preserveNodes whether to preseve other nodes, such as + * macros. This takes more memory, so only say yes if you + * need to. If set, will store all of the POIFSFileSystem + * in memory + * @see org.apache.poi.poifs.filesystem.POIFSFileSystem + * @exception IOException if the stream cannot be read + */ + public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes) + throws IOException + { + super(directory); String workbookName = getWorkbookDirEntryName(directory); this.preserveNodes = preserveNodes; diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java index 09536d4ade5..3a2c2860233 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java @@ -51,7 +51,7 @@ public NPOIFSDocument(DocumentProperty property, NPOIFSFileSystem filesystem) this._property = property; this._filesystem = filesystem; - if(property.getSize() <= POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) { + if(property.getSize() < POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) { _stream = new NPOIFSStream(_filesystem.getMiniStore(), property.getStartBlock()); _block_size = _filesystem.getMiniStore().getBlockStoreBlockSize(); } else { diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java index e671d870bb8..8a7b6833153 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java @@ -107,7 +107,7 @@ public NPOIFSFileSystem() /** * Creates a POIFSFileSystem from a File. This uses less memory than - * creating from an InputStream. + * creating from an InputStream. The File will be opened read-only * * Note that with this constructor, you will need to call {@link #close()} * when you're done to have the underlying file closed, as the file is @@ -119,22 +119,71 @@ public NPOIFSFileSystem() */ public NPOIFSFileSystem(File file) throws IOException + { + this(file, true); + } + + /** + * Creates a POIFSFileSystem from a File. This uses less memory than + * creating from an InputStream. + * + * Note that with this constructor, you will need to call {@link #close()} + * when you're done to have the underlying file closed, as the file is + * kept open during normal operation to read the data out. + * + * @param file the File from which to read the data + * + * @exception IOException on errors reading, or on invalid data + */ + public NPOIFSFileSystem(File file, boolean readOnly) + throws IOException + { + this( + (new RandomAccessFile(file, readOnly? "r" : "rw")).getChannel(), + true + ); + } + + /** + * Creates a POIFSFileSystem from an open FileChannel. This uses + * less memory than creating from an InputStream. + * + * Note that with this constructor, you will need to call {@link #close()} + * when you're done to have the underlying Channel closed, as the channel is + * kept open during normal operation to read the data out. + * + * @param channel the FileChannel from which to read the data + * + * @exception IOException on errors reading, or on invalid data + */ + public NPOIFSFileSystem(FileChannel channel) + throws IOException + { + this(channel, false); + } + + private NPOIFSFileSystem(FileChannel channel, boolean closeChannelOnError) + throws IOException { this(); - - // Open the underlying channel - FileChannel channel = (new RandomAccessFile(file, "r")).getChannel(); - - // Get the header - ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE); - IOUtils.readFully(channel, headerBuffer); - - // Have the header processed - _header = new HeaderBlock(headerBuffer); - - // Now process the various entries - _data = new FileBackedDataSource(channel); - readCoreContents(); + + try { + // Get the header + ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE); + IOUtils.readFully(channel, headerBuffer); + + // Have the header processed + _header = new HeaderBlock(headerBuffer); + + // Now process the various entries + _data = new FileBackedDataSource(channel); + readCoreContents(); + } catch(IOException e) { + if(closeChannelOnError) { + channel.close(); + } + throw e; + } } /** @@ -435,7 +484,7 @@ protected int getFreeBlock() throws IOException { // Oh joy, we need a new XBAT too... xbat = createBAT(offset+1, false); xbat.setValueAt(0, offset); - bat.setValueAt(offset+1, POIFSConstants.DIFAT_SECTOR_BLOCK); + bat.setValueAt(1, POIFSConstants.DIFAT_SECTOR_BLOCK); // Will go one place higher as XBAT added in offset++; diff --git a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java index 15625fd2a85..5170436c01d 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java @@ -54,9 +54,8 @@ public class EncryptedSlideShow public static boolean checkIfEncrypted(HSLFSlideShow hss) { // Easy way to check - contains a stream // "EncryptedSummary" - POIFSFileSystem fs = hss.getPOIFSFileSystem(); try { - fs.getRoot().getEntry("EncryptedSummary"); + hss.getPOIFSDirectory().getEntry("EncryptedSummary"); return true; } catch(FileNotFoundException fnfe) { // Doesn't have encrypted properties diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index c024b47eee0..a710ec93387 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -78,6 +78,9 @@ public final class HSLFSlideShow extends POIDocument { protected POIFSFileSystem getPOIFSFileSystem() { return directory.getFileSystem(); } + protected DirectoryNode getPOIFSDirectory() { + return directory; + } /** * Constructs a Powerpoint document from fileName. Parses the document diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 4186071bc8a..f1e1d8a9a29 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -163,14 +163,28 @@ public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException * in a POIFSFileSystem, probably not the default. * Used typically to open embeded documents. * + * @param directory The Directory that contains the Word document. * @param pfilesystem The POIFSFileSystem that contains the Word document. * @throws IOException If there is an unexpected IOException from the passed * in POIFSFileSystem. */ public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException + { + this(directory); + } + /** + * This constructor loads a Word document from a specific point + * in a POIFSFileSystem, probably not the default. + * Used typically to open embeded documents. + * + * @param directory The Directory that contains the Word document. + * @throws IOException If there is an unexpected IOException from the passed + * in POIFSFileSystem. + */ + public HWPFDocument(DirectoryNode directory) throws IOException { // Sort out the hpsf properties - super(directory, pfilesystem); + super(directory); readProperties(); // read in the main stream. diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index deaffc79afb..64981e94e73 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -28,6 +28,7 @@ import org.apache.poi.hwpf.usermodel.HeaderStories; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** @@ -59,6 +60,15 @@ public WordExtractor(POIFSFileSystem fs) throws IOException { this.fs = fs; } + /** + * Create a new Word Extractor + * @param dir DirectoryNode containing the word file + */ + public WordExtractor(DirectoryNode dir) throws IOException { + this(new HWPFDocument(dir)); + this.fs = fs; + } + /** * Create a new Word Extractor * @param doc The HWPFDocument to extract from diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java index a1b78752f81..a32232ae661 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -16,12 +16,14 @@ */ package org.apache.poi.hwpf.extractor; +import java.io.File; import java.io.FileInputStream; import junit.framework.TestCase; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** @@ -225,4 +227,31 @@ public void testWithFooter() throws Exception { text.indexOf("The footer, with") > -1 ); } + + /** + * Tests that we can work with both {@link POIFSFileSystem} + * and {@link NPOIFSFileSystem} + */ + public void testDifferentPOIFS() throws Exception { + String dirname = System.getProperty("HWPF.testdata.path"); + File f = new File(dirname, "test2.doc"); + + // Open the two filesystems + DirectoryNode[] files = new DirectoryNode[2]; + files[0] = (new POIFSFileSystem(new FileInputStream(f))).getRoot(); + files[1] = (new NPOIFSFileSystem(f)).getRoot(); + + // Open directly + for(DirectoryNode dir : files) { + WordExtractor extractor = new WordExtractor(dir); + assertEquals(p_text1_block, extractor.getText()); + } + + // Open via a HWPFDocument + for(DirectoryNode dir : files) { + HWPFDocument doc = new HWPFDocument(dir); + WordExtractor extractor = new WordExtractor(doc); + assertEquals(p_text1_block, extractor.getText()); + } + } } diff --git a/src/testcases/org/apache/poi/hssf/HSSFTestDataSamples.java b/src/testcases/org/apache/poi/hssf/HSSFTestDataSamples.java index 0e64637f972..bf9113a3dee 100644 --- a/src/testcases/org/apache/poi/hssf/HSSFTestDataSamples.java +++ b/src/testcases/org/apache/poi/hssf/HSSFTestDataSamples.java @@ -48,7 +48,14 @@ public final class HSSFTestDataSamples { * @return an open InputStream for the specified sample file */ public static InputStream openSampleFileStream(String sampleFileName) { - + File f = getSampeFile(sampleFileName); + try { + return new FileInputStream(f); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + } + public static File getSampeFile(String sampleFileName) { if(!_isInitialised) { try { initialise(); @@ -56,16 +63,6 @@ public static InputStream openSampleFileStream(String sampleFileName) { _isInitialised = true; } } - if (_sampleDataIsAvaliableOnClassPath) { - InputStream result = openClasspathResource(sampleFileName); - if(result == null) { - throw new RuntimeException("specified test sample file '" + sampleFileName - + "' not found on the classpath"); - } -// System.out.println("opening cp: " + sampleFileName); - // wrap to avoid temp warning method about auto-closing input stream - return new NonSeekableInputStream(result); - } if (_resolvedDataDir == null) { throw new RuntimeException("Must set system property '" + TEST_DATA_DIR_SYS_PROPERTY_NAME @@ -78,11 +75,7 @@ public static InputStream openSampleFileStream(String sampleFileName) { + "' not found in data dir '" + _resolvedDataDir.getAbsolutePath() + "'"); } // System.out.println("opening " + f.getAbsolutePath()); - try { - return new FileInputStream(f); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } + return f; } private static void initialise() { diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java index 4357c57e3b2..cd734a66da1 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java @@ -32,6 +32,9 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.apache.poi.hssf.record.RecordFormatException; import org.apache.poi.hssf.record.RecordInputStream; import org.apache.poi.hssf.record.formula.Area3DPtg; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.TempFile; /** @@ -550,4 +553,31 @@ public void testFindBuiltInNameRecord() { nr = wb.getWorkbook().getNameRecord(2); assertEquals("Sheet2!E:F,Sheet2!$A$9:$IV$12", nr.getAreaReference(wb)); // E:F,9:12 } + + /** + * Tests that we can work with both {@link POIFSFileSystem} + * and {@link NPOIFSFileSystem} + */ + public void testDifferentPOIFS() throws Exception { + // Open the two filesystems + DirectoryNode[] files = new DirectoryNode[2]; + files[0] = (new POIFSFileSystem(HSSFTestDataSamples.openSampleFileStream("Simple.xls"))).getRoot(); + files[1] = (new NPOIFSFileSystem(HSSFTestDataSamples.getSampeFile("Simple.xls"))).getRoot(); + + // Open without preserving nodes + for(DirectoryNode dir : files) { + HSSFWorkbook workbook = new HSSFWorkbook(dir, false); + HSSFSheet sheet = workbook.getSheetAt(0); + HSSFCell cell = sheet.getRow(0).getCell(0); + assertEquals("replaceMe", cell .getRichStringCellValue().getString()); + } + + // Now re-check with preserving + for(DirectoryNode dir : files) { + HSSFWorkbook workbook = new HSSFWorkbook(dir, true); + HSSFSheet sheet = workbook.getSheetAt(0); + HSSFCell cell = sheet.getRow(0).getCell(0); + assertEquals("replaceMe", cell .getRichStringCellValue().getString()); + } + } }