Skip to content

Commit

Permalink
Tweak HSLF and HWPF to work well with NPOIFS, and add unit tests for …
Browse files Browse the repository at this point in the history
…this

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1054191 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Gagravarr committed Jan 1, 2011
1 parent 981b7cb commit 2d583f4
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 38 deletions.
56 changes: 27 additions & 29 deletions src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java
Expand Up @@ -25,7 +25,6 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import org.apache.poi.hslf.record.PersistPtrHolder;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
* This class provides helper functions for determining if a
Expand All @@ -39,34 +38,33 @@ Licensed to the Apache Software Foundation (ASF) under one or more

public final class EncryptedSlideShow
{
/**
* Check to see if a HSLFSlideShow represents an encrypted
* PowerPoint document, or not
* @param hss The HSLFSlideShow to check
* @return true if encrypted, otherwise false
*/
public static boolean checkIfEncrypted(HSLFSlideShow hss) {
// Easy way to check - contains a stream
// "EncryptedSummary"
POIFSFileSystem fs = hss.getPOIFSFileSystem();
try {
fs.getRoot().getEntry("EncryptedSummary");
return true;
} catch(FileNotFoundException fnfe) {
// Doesn't have encrypted properties
}

// If they encrypted the document but not the properties,
// it's harder.
// We need to see what the last record pointed to by the
// first PersistPrtHolder is - if it's a
// DocumentEncryptionAtom, then the file's Encrypted
DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
if(dea != null) {
return true;
}
return false;
}
/**
* Check to see if a HSLFSlideShow represents an encrypted
* PowerPoint document, or not
* @param hss The HSLFSlideShow to check
* @return true if encrypted, otherwise false
*/
public static boolean checkIfEncrypted(HSLFSlideShow hss) {
// Easy way to check - contains a stream
// "EncryptedSummary"
try {
hss.getPOIFSDirectory().getEntry("EncryptedSummary");
return true;
} catch(FileNotFoundException fnfe) {
// Doesn't have encrypted properties
}

// If they encrypted the document but not the properties,
// it's harder.
// We need to see what the last record pointed to by the
// first PersistPrtHolder is - if it's a
// DocumentEncryptionAtom, then the file's Encrypted
DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
if(dea != null) {
return true;
}
return false;
}

/**
* Return the DocumentEncryptionAtom for a HSLFSlideShow, or
Expand Down
8 changes: 8 additions & 0 deletions src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
Expand Up @@ -84,6 +84,14 @@ protected POIFSFileSystem getPOIFSFileSystem() {
return directory.getFileSystem();
}

/**
* Returns the directory in the underlying POIFSFileSystem for the
* document that is open.
*/
protected DirectoryNode getPOIFSDirectory() {
return directory;
}

/**
* Constructs a Powerpoint document from fileName. Parses the document
* and places all the important stuff into data structures.
Expand Down
Expand Up @@ -17,20 +17,20 @@ Licensed to the Apache Software Foundation (ASF) under one or more

package org.apache.poi.hslf.extractor;

import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;

import junit.framework.TestCase;

import org.apache.poi.POIDataSamples;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.OLEShape;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.POIDataSamples;

import junit.framework.TestCase;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
* Tests that the extractor correctly gets the text out of our sample file
Expand All @@ -40,8 +40,13 @@ Licensed to the Apache Software Foundation (ASF) under one or more
public final class TestExtractor extends TestCase {
/** Extractor primed on the 2 page basic test data */
private PowerPointExtractor ppe;
private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";

/** Extractor primed on the 1 page but text-box'd test data */
private PowerPointExtractor ppe2;
private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";


/** Where our embeded files live */
//private String pdirname;
private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
Expand All @@ -55,16 +60,14 @@ protected void setUp() throws Exception {
public void testReadSheetText() {
// Basic 2 page example
String sheetText = ppe.getText();
String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";

ensureTwoStringsTheSame(expectText, sheetText);


// 1 page example with text boxes
sheetText = ppe2.getText();
expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";

ensureTwoStringsTheSame(expectText, sheetText);
ensureTwoStringsTheSame(expectText2, sheetText);
}

public void testReadNoteText() {
Expand Down Expand Up @@ -273,4 +276,28 @@ public void testMasterText() throws Exception {
assertTrue(text.contains("Master Header Text"));
}


/**
* Tests that we can work with both {@link POIFSFileSystem}
* and {@link NPOIFSFileSystem}
*/
public void testDifferentPOIFS() throws Exception {
// Open the two filesystems
DirectoryNode[] files = new DirectoryNode[2];
files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot();
files[1] = (new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt"))).getRoot();

// Open directly
for(DirectoryNode dir : files) {
PowerPointExtractor extractor = new PowerPointExtractor(dir, null);
assertEquals(expectText, extractor.getText());
}

// Open via a HWPFDocument
for(DirectoryNode dir : files) {
HSLFSlideShow slideshow = new HSLFSlideShow(dir);
PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
assertEquals(expectText, extractor.getText());
}
}
}
Expand Up @@ -24,6 +24,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
Expand Down Expand Up @@ -314,4 +315,30 @@ public void testFirstParagraphFix() throws Exception {

assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
}

/**
* Tests that we can work with both {@link POIFSFileSystem}
* and {@link NPOIFSFileSystem}
*/
public void testDifferentPOIFS() throws Exception {
POIDataSamples docTests = POIDataSamples.getDocumentInstance();

// Open the two filesystems
DirectoryNode[] files = new DirectoryNode[2];
files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot();
files[1] = (new NPOIFSFileSystem(docTests.getFile("test2.doc"))).getRoot();

// Open directly
for(DirectoryNode dir : files) {
WordExtractor extractor = new WordExtractor(dir, null);
assertEquals(p_text1_block, extractor.getText());
}

// Open via a HWPFDocument
for(DirectoryNode dir : files) {
HWPFDocument doc = new HWPFDocument(dir);
WordExtractor extractor = new WordExtractor(doc);
assertEquals(p_text1_block, extractor.getText());
}
}
}

0 comments on commit 2d583f4

Please sign in to comment.