-
Notifications
You must be signed in to change notification settings - Fork 736
/
HSLFSlideShow.java
586 lines (511 loc) · 20.7 KB
/
HSLFSlideShow.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import org.apache.poi.POIDocument;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.record.CurrentUserAtom;
import org.apache.poi.hslf.record.ExOleObjStg;
import org.apache.poi.hslf.record.PersistPtrHolder;
import org.apache.poi.hslf.record.PersistRecord;
import org.apache.poi.hslf.record.PositionDependentRecord;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* This class contains the main functionality for the Powerpoint file
* "reader". It is only a very basic class for now
*
* @author Nick Burch
*/
public final class HSLFSlideShow extends POIDocument {
// For logging
private POILogger logger = POILogFactory.getLogger(this.getClass());
// Holds metadata on where things are in our document
private CurrentUserAtom currentUser;
// Low level contents of the file
private byte[] _docstream;
// Low level contents
private Record[] _records;
// Raw Pictures contained in the pictures stream
private List<PictureData> _pictures;
// Embedded objects stored in storage records in the document stream, lazily populated.
private ObjectData[] _objects;
/**
* Returns the underlying POIFSFileSystem for the document
* that is open.
*/
protected POIFSFileSystem getPOIFSFileSystem() {
return directory.getFileSystem();
}
/**
* Returns the directory in the underlying POIFSFileSystem for the
* document that is open.
*/
protected DirectoryNode getPOIFSDirectory() {
return directory;
}
/**
* Constructs a Powerpoint document from fileName. Parses the document
* and places all the important stuff into data structures.
*
* @param fileName The name of the file to read.
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(String fileName) throws IOException
{
this(new FileInputStream(fileName));
}
/**
* Constructs a Powerpoint document from an input stream. Parses the
* document and places all the important stuff into data structures.
*
* @param inputStream the source of the data
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(InputStream inputStream) throws IOException {
//do Ole stuff
this(new POIFSFileSystem(inputStream));
}
/**
* Constructs a Powerpoint document from a POIFS Filesystem. Parses the
* document and places all the important stuff into data structures.
*
* @param filesystem the POIFS FileSystem to read from
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
{
this(filesystem.getRoot());
}
/**
* Constructs a Powerpoint document from a specific point in a
* POIFS Filesystem. Parses the document and places all the
* important stuff into data structures.
*
* @param dir the POIFS directory to read from
* @param filesystem the POIFS FileSystem to read from
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException
{
this(dir);
}
/**
* Constructs a Powerpoint document from a specific point in a
* POIFS Filesystem. Parses the document and places all the
* important stuff into data structures.
*
* @param dir the POIFS directory to read from
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(DirectoryNode dir) throws IOException
{
super(dir);
// First up, grab the "Current User" stream
// We need this before we can detect Encrypted Documents
readCurrentUserStream();
// Next up, grab the data that makes up the
// PowerPoint stream
readPowerPointStream();
// Check to see if we have an encrypted document,
// bailing out if we do
boolean encrypted = EncryptedSlideShow.checkIfEncrypted(this);
if(encrypted) {
throw new EncryptedPowerPointFileException("Encrypted PowerPoint files are not supported");
}
// Now, build records based on the PowerPoint stream
buildRecords();
// Look for any other streams
readOtherStreams();
// Look for Picture Streams:
readPictures();
}
/**
* Constructs a new, empty, Powerpoint document.
*/
public static final HSLFSlideShow create() {
InputStream is = HSLFSlideShow.class.getResourceAsStream("data/empty.ppt");
if (is == null) {
throw new RuntimeException("Missing resource 'empty.ppt'");
}
try {
return new HSLFSlideShow(is);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Extracts the main PowerPoint document stream from the
* POI file, ready to be passed
*
* @throws IOException
*/
private void readPowerPointStream() throws IOException
{
// Get the main document stream
DocumentEntry docProps =
(DocumentEntry)directory.getEntry("PowerPoint Document");
// Grab the document stream
_docstream = new byte[docProps.getSize()];
directory.createDocumentInputStream("PowerPoint Document").read(_docstream);
}
/**
* Builds the list of records, based on the contents
* of the PowerPoint stream
*/
private void buildRecords()
{
// The format of records in a powerpoint file are:
// <little endian 2 byte "info">
// <little endian 2 byte "type">
// <little endian 4 byte "length">
// If it has a zero length, following it will be another record
// <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
// If it has a length, depending on its type it may have children or data
// If it has children, these will follow straight away
// <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
// If it has data, this will come straigh after, and run for the length
// <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
// All lengths given exclude the 8 byte record header
// (Data records are known as Atoms)
// Document should start with:
// 0F 00 E8 03 ## ## ## ##
// (type 1000 = document, info 00 0f is normal, rest is document length)
// 01 00 E9 03 28 00 00 00
// (type 1001 = document atom, info 00 01 normal, 28 bytes long)
// 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
// 05 00 00 00 0A 00 00 00 xx xx xx
// (the contents of the document atom, not sure what it means yet)
// (records then follow)
// When parsing a document, look to see if you know about that type
// of the current record. If you know it's a type that has children,
// process the record's data area looking for more records
// If you know about the type and it doesn't have children, either do
// something with the data (eg TextRun) or skip over it
// If you don't know about the type, play safe and skip over it (using
// its length to know where the next record will start)
//
_records = read(_docstream, (int)currentUser.getCurrentEditOffset());
}
private Record[] read(byte[] docstream, int usrOffset){
ArrayList<Integer> lst = new ArrayList<Integer>();
HashMap<Integer,Integer> offset2id = new HashMap<Integer,Integer>();
while (usrOffset != 0){
UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
lst.add(Integer.valueOf(usrOffset));
int psrOffset = usr.getPersistPointersOffset();
PersistPtrHolder ptr = (PersistPtrHolder)Record.buildRecordAtOffset(docstream, psrOffset);
lst.add(Integer.valueOf(psrOffset));
Hashtable<Integer,Integer> entries = ptr.getSlideLocationsLookup();
for(Integer id : entries.keySet()) {
Integer offset = entries.get(id);
lst.add(offset);
offset2id.put(offset, id);
}
usrOffset = usr.getLastUserEditAtomOffset();
}
//sort found records by offset.
//(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
Integer a[] = lst.toArray(new Integer[lst.size()]);
Arrays.sort(a);
Record[] rec = new Record[lst.size()];
for (int i = 0; i < a.length; i++) {
Integer offset = a[i];
rec[i] = Record.buildRecordAtOffset(docstream, offset.intValue());
if(rec[i] instanceof PersistRecord) {
PersistRecord psr = (PersistRecord)rec[i];
Integer id = offset2id.get(offset);
psr.setPersistId(id.intValue());
}
}
return rec;
}
/**
* Find the "Current User" stream, and load it
*/
private void readCurrentUserStream() {
try {
currentUser = new CurrentUserAtom(directory);
} catch(IOException ie) {
logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie);
currentUser = new CurrentUserAtom();
}
}
/**
* Find any other streams from the filesystem, and load them
*/
private void readOtherStreams() {
// Currently, there aren't any
}
/**
* Find and read in pictures contained in this presentation
*/
private void readPictures() throws IOException {
_pictures = new ArrayList<PictureData>();
byte[] pictstream;
try {
DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures");
pictstream = new byte[entry.getSize()];
DocumentInputStream is = directory.createDocumentInputStream("Pictures");
is.read(pictstream);
} catch (FileNotFoundException e){
// Silently catch exceptions if the presentation doesn't
// contain pictures - will use a null set instead
return;
}
int pos = 0;
// An empty picture record (length 0) will take up 8 bytes
while (pos <= (pictstream.length-8)) {
int offset = pos;
// Image signature
int signature = LittleEndian.getUShort(pictstream, pos);
pos += LittleEndian.SHORT_SIZE;
// Image type + 0xF018
int type = LittleEndian.getUShort(pictstream, pos);
pos += LittleEndian.SHORT_SIZE;
// Image size (excluding the 8 byte header)
int imgsize = LittleEndian.getInt(pictstream, pos);
pos += LittleEndian.INT_SIZE;
// The image size must be 0 or greater
// (0 is allowed, but odd, since we do wind on by the header each
// time, so we won't get stuck)
if(imgsize < 0) {
throw new CorruptPowerPointFileException("The file contains a picture, at position " + _pictures.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
}
// If they type (including the bonus 0xF018) is 0, skip it
if(type == 0) {
logger.log(POILogger.ERROR, "Problem reading picture: Invalid image type 0, on picture with length " + imgsize + ".\nYou document will probably become corrupted if you save it!");
logger.log(POILogger.ERROR, "" + pos);
} else {
// Build the PictureData object from the data
try {
PictureData pict = PictureData.create(type - 0xF018);
// Copy the data, ready to pass to PictureData
byte[] imgdata = new byte[imgsize];
System.arraycopy(pictstream, pos, imgdata, 0, imgdata.length);
pict.setRawData(imgdata);
pict.setOffset(offset);
_pictures.add(pict);
} catch(IllegalArgumentException e) {
logger.log(POILogger.ERROR, "Problem reading picture: " + e + "\nYou document will probably become corrupted if you save it!");
}
}
pos += imgsize;
}
}
/**
* Writes out the slideshow file the is represented by an instance
* of this class.
* It will write out the common OLE2 streams. If you require all
* streams to be written out, pass in preserveNodes
* @param out The OutputStream to write to.
* @throws IOException If there is an unexpected IOException from
* the passed in OutputStream
*/
public void write(OutputStream out) throws IOException {
// Write out, but only the common streams
write(out,false);
}
/**
* Writes out the slideshow file the is represented by an instance
* of this class.
* If you require all streams to be written out (eg Marcos, embeded
* documents), then set preserveNodes to true
* @param out The OutputStream to write to.
* @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
* @throws IOException If there is an unexpected IOException from
* the passed in OutputStream
*/
public void write(OutputStream out, boolean preserveNodes) throws IOException {
// Get a new Filesystem to write into
POIFSFileSystem outFS = new POIFSFileSystem();
// The list of entries we've written out
List<String> writtenEntries = new ArrayList<String>(1);
// Write out the Property Streams
writeProperties(outFS, writtenEntries);
// For position dependent records, hold where they were and now are
// As we go along, update, and hand over, to any Position Dependent
// records we happen across
Hashtable<Integer,Integer> oldToNewPositions = new Hashtable<Integer,Integer>();
// First pass - figure out where all the position dependent
// records are going to end up, in the new scheme
// (Annoyingly, some powerpoing files have PersistPtrHolders
// that reference slides after the PersistPtrHolder)
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for(int i=0; i<_records.length; i++) {
if(_records[i] instanceof PositionDependentRecord) {
PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
int oldPos = pdr.getLastOnDiskOffset();
int newPos = baos.size();
pdr.setLastOnDiskOffset(newPos);
oldToNewPositions.put(Integer.valueOf(oldPos),Integer.valueOf(newPos));
//System.out.println(oldPos + " -> " + newPos);
}
// Dummy write out, so the position winds on properly
_records[i].writeOut(baos);
}
// No go back through, actually writing ourselves out
baos.reset();
for(int i=0; i<_records.length; i++) {
// For now, we're only handling PositionDependentRecord's that
// happen at the top level.
// In future, we'll need the handle them everywhere, but that's
// a bit trickier
if(_records[i] instanceof PositionDependentRecord) {
// We've already figured out their new location, and
// told them that
// Tell them of the positions of the other records though
PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
pdr.updateOtherRecordReferences(oldToNewPositions);
}
// Whatever happens, write out that record tree
_records[i].writeOut(baos);
}
// Update our cached copy of the bytes that make up the PPT stream
_docstream = baos.toByteArray();
// Write the PPT stream into the POIFS layer
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
outFS.createDocument(bais,"PowerPoint Document");
writtenEntries.add("PowerPoint Document");
// Update and write out the Current User atom
int oldLastUserEditAtomPos = (int)currentUser.getCurrentEditOffset();
Integer newLastUserEditAtomPos = (Integer)oldToNewPositions.get(Integer.valueOf(oldLastUserEditAtomPos));
if(newLastUserEditAtomPos == null) {
throw new HSLFException("Couldn't find the new location of the UserEditAtom that used to be at " + oldLastUserEditAtomPos);
}
currentUser.setCurrentEditOffset(newLastUserEditAtomPos.intValue());
currentUser.writeToFS(outFS);
writtenEntries.add("Current User");
// Write any pictures, into another stream
if (_pictures.size() > 0) {
ByteArrayOutputStream pict = new ByteArrayOutputStream();
for (PictureData p : _pictures) {
p.write(pict);
}
outFS.createDocument(
new ByteArrayInputStream(pict.toByteArray()), "Pictures"
);
writtenEntries.add("Pictures");
}
// If requested, write out any other streams we spot
if(preserveNodes) {
copyNodes(directory.getFileSystem(), outFS, writtenEntries);
}
// Send the POIFSFileSystem object out to the underlying stream
outFS.writeFilesystem(out);
}
/* ******************* adding methods follow ********************* */
/**
* Adds a new root level record, at the end, but before the last
* PersistPtrIncrementalBlock.
*/
public synchronized int appendRootLevelRecord(Record newRecord) {
int addedAt = -1;
Record[] r = new Record[_records.length+1];
boolean added = false;
for(int i=(_records.length-1); i>=0; i--) {
if(added) {
// Just copy over
r[i] = _records[i];
} else {
r[(i+1)] = _records[i];
if(_records[i] instanceof PersistPtrHolder) {
r[i] = newRecord;
added = true;
addedAt = i;
}
}
}
_records = r;
return addedAt;
}
/**
* Add a new picture to this presentation.
*
* @return offset of this picture in the Pictures stream
*/
public int addPicture(PictureData img) {
int offset = 0;
if(_pictures.size() > 0){
PictureData prev = _pictures.get(_pictures.size() - 1);
offset = prev.getOffset() + prev.getRawData().length + 8;
}
img.setOffset(offset);
_pictures.add(img);
return offset;
}
/* ******************* fetching methods follow ********************* */
/**
* Returns an array of all the records found in the slideshow
*/
public Record[] getRecords() { return _records; }
/**
* Returns an array of the bytes of the file. Only correct after a
* call to open or write - at all other times might be wrong!
*/
public byte[] getUnderlyingBytes() { return _docstream; }
/**
* Fetch the Current User Atom of the document
*/
public CurrentUserAtom getCurrentUserAtom() { return currentUser; }
/**
* Return array of pictures contained in this presentation
*
* @return array with the read pictures or <code>null</code> if the
* presentation doesn't contain pictures.
*/
public PictureData[] getPictures() {
return _pictures.toArray(new PictureData[_pictures.size()]);
}
/**
* Gets embedded object data from the slide show.
*
* @return the embedded objects.
*/
public ObjectData[] getEmbeddedObjects() {
if (_objects == null) {
List<ObjectData> objects = new ArrayList<ObjectData>();
for (int i = 0; i < _records.length; i++) {
if (_records[i] instanceof ExOleObjStg) {
objects.add(new ObjectData((ExOleObjStg) _records[i]));
}
}
_objects = objects.toArray(new ObjectData[objects.size()]);
}
return _objects;
}
}