-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
HFile.java
914 lines (800 loc) · 33.8 KB
/
HFile.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.SequenceInputStream;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.LongAdder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.regionserver.CellSink;
import org.apache.hadoop.hbase.regionserver.ShipperListener;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.UnsafeByteOperations;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.Writable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
/**
* File format for hbase.
* A file of sorted key/value pairs. Both keys and values are byte arrays.
* <p>
* The memory footprint of a HFile includes the following (below is taken from the
* <a
* href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
* but applies also to HFile):
* <ul>
* <li>Some constant overhead of reading or writing a compressed block.
* <ul>
* <li>Each compressed block requires one compression/decompression codec for
* I/O.
* <li>Temporary space to buffer the key.
* <li>Temporary space to buffer the value.
* </ul>
* <li>HFile index, which is proportional to the total number of Data Blocks.
* The total amount of memory needed to hold the index can be estimated as
* (56+AvgKeySize)*NumBlocks.
* </ul>
* Suggestions on performance optimization.
* <ul>
* <li>Minimum block size. We recommend a setting of minimum block size between
* 8KB to 1MB for general usage. Larger block size is preferred if files are
* primarily for sequential access. However, it would lead to inefficient random
* access (because there are more data to decompress). Smaller blocks are good
* for random access, but require more memory to hold the block index, and may
* be slower to create (because we must flush the compressor stream at the
* conclusion of each data block, which leads to an FS I/O flush). Further, due
* to the internal caching in Compression codec, the smallest possible block
* size would be around 20KB-30KB.
* <li>The current implementation does not offer true multi-threading for
* reading. The implementation uses FSDataInputStream seek()+read(), which is
* shown to be much faster than positioned-read call in single thread mode.
* However, it also means that if multiple threads attempt to access the same
* HFile (using multiple scanners) simultaneously, the actual I/O is carried out
* sequentially even if they access different DFS blocks (Reexamine! pread seems
* to be 10% faster than seek+read in my testing -- stack).
* <li>Compression codec. Use "none" if the data is not very compressable (by
* compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
* as the starting point for experimenting. "gz" overs slightly better
* compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
* decompress, comparing to "lzo".
* </ul>
*
* For more on the background behind HFile, see <a
* href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
* <p>
* File is made of data blocks followed by meta data blocks (if any), a fileinfo
* block, data block index, meta data block index, and a fixed size trailer
* which records the offsets at which file changes content type.
* <pre><data blocks><meta blocks><fileinfo><
* data index><meta index><trailer></pre>
* Each block has a bit of magic at its start. Block are comprised of
* key/values. In data blocks, they are both byte arrays. Metadata blocks are
* a String key and a byte array value. An empty file looks like this:
* <pre><fileinfo><trailer></pre>. That is, there are not data nor meta
* blocks present.
* <p>
* TODO: Do scanners need to be able to take a start and end row?
* TODO: Should BlockIndex know the name of its file? Should it have a Path
* that points at its file say for the case where an index lives apart from
* an HFile instance?
*/
@InterfaceAudience.Private
public class HFile {
// LOG is being used in HFileBlock and CheckSumUtil
static final Log LOG = LogFactory.getLog(HFile.class);
/**
* Maximum length of key in HFile.
*/
public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
/**
* Default compression: none.
*/
public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
Compression.Algorithm.NONE;
/** Minimum supported HFile format version */
public static final int MIN_FORMAT_VERSION = 2;
/** Maximum supported HFile format version
*/
public static final int MAX_FORMAT_VERSION = 3;
/**
* Minimum HFile format version with support for persisting cell tags
*/
public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
/** Default compression name: none. */
public final static String DEFAULT_COMPRESSION =
DEFAULT_COMPRESSION_ALGORITHM.getName();
/** Meta data block name for bloom filter bits. */
public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
/**
* We assume that HFile path ends with
* ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
* many levels of nesting. This is needed for identifying table and CF name
* from an HFile path.
*/
public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
/**
* The number of bytes per checksum.
*/
public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
// For measuring number of checksum failures
static final LongAdder CHECKSUM_FAILURES = new LongAdder();
// For tests. Gets incremented when we read a block whether from HDFS or from Cache.
public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder();
/**
* Number of checksum verification failures. It also
* clears the counter.
*/
public static final long getChecksumFailuresCount() {
return CHECKSUM_FAILURES.sumThenReset();
}
/** API required to write an {@link HFile} */
public interface Writer extends Closeable, CellSink, ShipperListener {
/** Max memstore (mvcc) timestamp in FileInfo */
public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
/** Add an element to the file info map. */
void appendFileInfo(byte[] key, byte[] value) throws IOException;
/** @return the path to this {@link HFile} */
Path getPath();
/**
* Adds an inline block writer such as a multi-level block index writer or
* a compound Bloom filter writer.
*/
void addInlineBlockWriter(InlineBlockWriter bloomWriter);
// The below three methods take Writables. We'd like to undo Writables but undoing the below would be pretty
// painful. Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
// to map between Message and Writable or byte [] and current Writable serialization. This would be a bit of work
// to little gain. Thats my thinking at moment. St.Ack 20121129
void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
/**
* Store general Bloom filter in the file. This does not deal with Bloom filter
* internals but is necessary, since Bloom filters are stored differently
* in HFile version 1 and version 2.
*/
void addGeneralBloomFilter(BloomFilterWriter bfw);
/**
* Store delete family Bloom filter in the file, which is only supported in
* HFile V2.
*/
void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
/**
* Return the file context for the HFile this writer belongs to
*/
HFileContext getFileContext();
}
/**
* This variety of ways to construct writers is used throughout the code, and
* we want to be able to swap writer implementations.
*/
public static class WriterFactory {
protected final Configuration conf;
protected final CacheConfig cacheConf;
protected FileSystem fs;
protected Path path;
protected FSDataOutputStream ostream;
protected CellComparator comparator =
CellComparator.COMPARATOR;
protected InetSocketAddress[] favoredNodes;
private HFileContext fileContext;
protected boolean shouldDropBehind = false;
WriterFactory(Configuration conf, CacheConfig cacheConf) {
this.conf = conf;
this.cacheConf = cacheConf;
}
public WriterFactory withPath(FileSystem fs, Path path) {
Preconditions.checkNotNull(fs);
Preconditions.checkNotNull(path);
this.fs = fs;
this.path = path;
return this;
}
public WriterFactory withOutputStream(FSDataOutputStream ostream) {
Preconditions.checkNotNull(ostream);
this.ostream = ostream;
return this;
}
public WriterFactory withComparator(CellComparator comparator) {
Preconditions.checkNotNull(comparator);
this.comparator = comparator;
return this;
}
public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
// Deliberately not checking for null here.
this.favoredNodes = favoredNodes;
return this;
}
public WriterFactory withFileContext(HFileContext fileContext) {
this.fileContext = fileContext;
return this;
}
public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
this.shouldDropBehind = shouldDropBehind;
return this;
}
public Writer create() throws IOException {
if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
throw new AssertionError("Please specify exactly one of " +
"filesystem/path or path");
}
if (path != null) {
ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
try {
ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
} catch (UnsupportedOperationException uoe) {
if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
}
}
return new HFileWriterImpl(conf, cacheConf, path, ostream, comparator, fileContext);
}
}
/** The configuration key for HFile version to use for new files */
public static final String FORMAT_VERSION_KEY = "hfile.format.version";
public static int getFormatVersion(Configuration conf) {
int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
checkFormatVersion(version);
return version;
}
/**
* Returns the factory to be used to create {@link HFile} writers.
* Disables block cache access for all writers created through the
* returned factory.
*/
public static final WriterFactory getWriterFactoryNoCache(Configuration
conf) {
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
}
/**
* Returns the factory to be used to create {@link HFile} writers
*/
public static final WriterFactory getWriterFactory(Configuration conf,
CacheConfig cacheConf) {
int version = getFormatVersion(conf);
switch (version) {
case 2:
throw new IllegalArgumentException("This should never happen. " +
"Did you change hfile.format.version to read v2? This version of the software writes v3" +
" hfiles only (but it can read v2 files without having to update hfile.format.version " +
"in hbase-site.xml)");
case 3:
return new HFile.WriterFactory(conf, cacheConf);
default:
throw new IllegalArgumentException("Cannot create writer for HFile " +
"format version " + version);
}
}
/**
* An abstraction used by the block index.
* Implementations will check cache for any asked-for block and return cached block if found.
* Otherwise, after reading from fs, will try and put block into cache before returning.
*/
public interface CachingBlockReader {
/**
* Read in a file block.
* @param offset offset to read.
* @param onDiskBlockSize size of the block
* @param cacheBlock
* @param pread
* @param isCompaction is this block being read as part of a compaction
* @param expectedBlockType the block type we are expecting to read with this read operation,
* or null to read whatever block type is available and avoid checking (that might reduce
* caching efficiency of encoded data blocks)
* @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
* to be in, or null to not perform this check and return the block irrespective of the
* encoding. This check only applies to data blocks and can be set to null when the caller is
* expecting to read a non-data block and has set expectedBlockType accordingly.
* @return Block wrapped in a ByteBuffer.
* @throws IOException
*/
HFileBlock readBlock(long offset, long onDiskBlockSize,
boolean cacheBlock, final boolean pread, final boolean isCompaction,
final boolean updateCacheMetrics, BlockType expectedBlockType,
DataBlockEncoding expectedDataBlockEncoding)
throws IOException;
/**
* Return the given block back to the cache, if it was obtained from cache.
* @param block Block to be returned.
*/
void returnBlock(HFileBlock block);
}
/** An interface used by clients to open and iterate an {@link HFile}. */
public interface Reader extends Closeable, CachingBlockReader {
/**
* Returns this reader's "name". Usually the last component of the path.
* Needs to be constant as the file is being moved to support caching on
* write.
*/
String getName();
CellComparator getComparator();
HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
Map<byte[], byte[]> loadFileInfo() throws IOException;
Cell getLastKey();
Cell midkey() throws IOException;
long length();
long getEntries();
Cell getFirstKey();
long indexSize();
byte[] getFirstRowKey();
byte[] getLastRowKey();
FixedFileTrailer getTrailer();
HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
HFileScanner getScanner(boolean cacheBlocks, boolean pread);
Compression.Algorithm getCompressionAlgorithm();
/**
* Retrieves general Bloom filter metadata as appropriate for each
* {@link HFile} version.
* Knows nothing about how that metadata is structured.
*/
DataInput getGeneralBloomFilterMetadata() throws IOException;
/**
* Retrieves delete family Bloom filter metadata as appropriate for each
* {@link HFile} version.
* Knows nothing about how that metadata is structured.
*/
DataInput getDeleteBloomFilterMetadata() throws IOException;
Path getPath();
/** Close method with optional evictOnClose */
void close(boolean evictOnClose) throws IOException;
DataBlockEncoding getDataBlockEncoding();
boolean hasMVCCInfo();
/**
* Return the file context of the HFile this reader belongs to
*/
HFileContext getFileContext();
boolean isPrimaryReplicaReader();
void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
boolean shouldIncludeMemstoreTS();
boolean isDecodeMemstoreTS();
DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
@VisibleForTesting
HFileBlock.FSReader getUncachedBlockReader();
@VisibleForTesting
boolean prefetchComplete();
}
/**
* Method returns the reader given the specified arguments.
* TODO This is a bad abstraction. See HBASE-6635.
*
* @param path hfile's path
* @param fsdis stream of path's file
* @param size max size of the trailer.
* @param cacheConf Cache configuation values, cannot be null.
* @param hfs
* @return an appropriate instance of HFileReader
* @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
justification="Intentional")
private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
FixedFileTrailer trailer = null;
try {
boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
assert !isHBaseChecksum; // Initially we must read with FS checksum.
trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
switch (trailer.getMajorVersion()) {
case 2:
LOG.debug("Opening HFile v2 with v3 reader");
// Fall through. FindBugs: SF_SWITCH_FALLTHROUGH
case 3 :
return new HFileReaderImpl(path, trailer, fsdis, size, cacheConf, hfs, conf);
default:
throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
}
} catch (Throwable t) {
try {
fsdis.close();
} catch (Throwable t2) {
LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
}
throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
}
}
/**
* @param fs A file system
* @param path Path to HFile
* @param fsdis a stream of path's file
* @param size max size of the trailer.
* @param cacheConf Cache configuration for hfile's contents
* @param conf Configuration
* @return A version specific Hfile Reader
* @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
*/
@SuppressWarnings("resource")
public static Reader createReader(FileSystem fs, Path path,
FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
throws IOException {
HFileSystem hfs = null;
// If the fs is not an instance of HFileSystem, then create an
// instance of HFileSystem that wraps over the specified fs.
// In this case, we will not be able to avoid checksumming inside
// the filesystem.
if (!(fs instanceof HFileSystem)) {
hfs = new HFileSystem(fs);
} else {
hfs = (HFileSystem)fs;
}
return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
}
/**
*
* @param fs filesystem
* @param path Path to file to read
* @param cacheConf This must not be null. @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
* @return an active Reader instance
* @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
*/
public static Reader createReader(
FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
cacheConf, stream.getHfs(), conf);
}
/**
* This factory method is used only by unit tests
*/
static Reader createReaderFromStream(Path path,
FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
throws IOException {
FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
}
/**
* Returns true if the specified file has a valid HFile Trailer.
* @param fs filesystem
* @param path Path to file to verify
* @return true if the file has a valid HFile Trailer, otherwise false
* @throws IOException if failed to read from the underlying stream
*/
public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
return isHFileFormat(fs, fs.getFileStatus(path));
}
/**
* Returns true if the specified file has a valid HFile Trailer.
* @param fs filesystem
* @param fileStatus the file to verify
* @return true if the file has a valid HFile Trailer, otherwise false
* @throws IOException if failed to read from the underlying stream
*/
public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
throws IOException {
final Path path = fileStatus.getPath();
final long size = fileStatus.getLen();
FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
try {
boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
assert !isHBaseChecksum; // Initially we must read with FS checksum.
FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
return true;
} catch (IllegalArgumentException e) {
return false;
} catch (IOException e) {
throw e;
} finally {
try {
fsdis.close();
} catch (Throwable t) {
LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
}
}
}
/**
* Metadata for this file. Conjured by the writer. Read in by the reader.
*/
public static class FileInfo implements SortedMap<byte[], byte[]> {
static final String RESERVED_PREFIX = "hfile.";
static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
public FileInfo() {
super();
}
/**
* Append the given key/value pair to the file info, optionally checking the
* key prefix.
*
* @param k key to add
* @param v value to add
* @param checkPrefix whether to check that the provided key does not start
* with the reserved prefix
* @return this file info object
* @throws IOException if the key or value is invalid
*/
public FileInfo append(final byte[] k, final byte[] v,
final boolean checkPrefix) throws IOException {
if (k == null || v == null) {
throw new NullPointerException("Key nor value may be null");
}
if (checkPrefix && isReservedFileInfoKey(k)) {
throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
+ " are reserved");
}
put(k, v);
return this;
}
public void clear() {
this.map.clear();
}
public Comparator<? super byte[]> comparator() {
return map.comparator();
}
public boolean containsKey(Object key) {
return map.containsKey(key);
}
public boolean containsValue(Object value) {
return map.containsValue(value);
}
public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
return map.entrySet();
}
public boolean equals(Object o) {
return map.equals(o);
}
public byte[] firstKey() {
return map.firstKey();
}
public byte[] get(Object key) {
return map.get(key);
}
public int hashCode() {
return map.hashCode();
}
public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
return this.map.headMap(toKey);
}
public boolean isEmpty() {
return map.isEmpty();
}
public Set<byte[]> keySet() {
return map.keySet();
}
public byte[] lastKey() {
return map.lastKey();
}
public byte[] put(byte[] key, byte[] value) {
return this.map.put(key, value);
}
public void putAll(Map<? extends byte[], ? extends byte[]> m) {
this.map.putAll(m);
}
public byte[] remove(Object key) {
return this.map.remove(key);
}
public int size() {
return map.size();
}
public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
return this.map.subMap(fromKey, toKey);
}
public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
return this.map.tailMap(fromKey);
}
public Collection<byte[]> values() {
return map.values();
}
/**
* Write out this instance on the passed in <code>out</code> stream.
* We write it as a protobuf.
* @param out
* @throws IOException
* @see #read(DataInputStream)
*/
void write(final DataOutputStream out) throws IOException {
HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(e.getKey()));
bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(e.getValue()));
builder.addMapEntry(bbpBuilder.build());
}
out.write(ProtobufMagic.PB_MAGIC);
builder.build().writeDelimitedTo(out);
}
/**
* Populate this instance with what we find on the passed in <code>in</code> stream.
* Can deserialize protobuf of old Writables format.
* @param in
* @throws IOException
* @see #write(DataOutputStream)
*/
void read(final DataInputStream in) throws IOException {
// This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
int pblen = ProtobufUtil.lengthOfPBMagic();
byte [] pbuf = new byte[pblen];
if (in.markSupported()) in.mark(pblen);
int read = in.read(pbuf);
if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
} else {
if (in.markSupported()) {
in.reset();
parseWritable(in);
} else {
// We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
// TODO: Am I leaking anything here wrapping the passed in stream? We are not calling close on the wrapped
// streams but they should be let go after we leave this context? I see that we keep a reference to the
// passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
parseWritable(new DataInputStream(sis));
}
}
}
/** Now parse the old Writable format. It was a list of Map entries. Each map entry was a key and a value of
* a byte []. The old map format had a byte before each entry that held a code which was short for the key or
* value type. We know it was a byte [] so in below we just read and dump it.
* @throws IOException
*/
void parseWritable(final DataInputStream in) throws IOException {
// First clear the map. Otherwise we will just accumulate entries every time this method is called.
this.map.clear();
// Read the number of entries in the map
int entries = in.readInt();
// Then read each key/value pair
for (int i = 0; i < entries; i++) {
byte [] key = Bytes.readByteArray(in);
// We used to read a byte that encoded the class type. Read and ignore it because it is always byte [] in hfile
in.readByte();
byte [] value = Bytes.readByteArray(in);
this.map.put(key, value);
}
}
/**
* Fill our map with content of the pb we read off disk
* @param fip protobuf message to read
*/
void parsePB(final HFileProtos.FileInfoProto fip) {
this.map.clear();
for (BytesBytesPair pair: fip.getMapEntryList()) {
this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
}
}
}
/** Return true if the given file info key is reserved for internal use. */
public static boolean isReservedFileInfoKey(byte[] key) {
return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
}
/**
* Get names of supported compression algorithms. The names are acceptable by
* HFile.Writer.
*
* @return Array of strings, each represents a supported compression
* algorithm. Currently, the following compression algorithms are
* supported.
* <ul>
* <li>"none" - No compression.
* <li>"gz" - GZIP compression.
* </ul>
*/
public static String[] getSupportedCompressionAlgorithms() {
return Compression.getSupportedAlgorithms();
}
// Utility methods.
/*
* @param l Long to convert to an int.
* @return <code>l</code> cast as an int.
*/
static int longToInt(final long l) {
// Expecting the size() of a block not exceeding 4GB. Assuming the
// size() will wrap to negative integer if it exceeds 2GB (From tfile).
return (int)(l & 0x00000000ffffffffL);
}
/**
* Returns all HFiles belonging to the given region directory. Could return an
* empty list.
*
* @param fs The file system reference.
* @param regionDir The region directory to scan.
* @return The list of files found.
* @throws IOException When scanning the files fails.
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
throws IOException {
List<Path> regionHFiles = new ArrayList<Path>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for(FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDirectory() &&
(!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
(!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
regionHFiles.add(file.getPath());
}
}
}
return regionHFiles;
}
/**
* Checks the given {@link HFile} format version, and throws an exception if
* invalid. Note that if the version number comes from an input file and has
* not been verified, the caller needs to re-throw an {@link IOException} to
* indicate that this is not a software error, but corrupted input.
*
* @param version an HFile version
* @throws IllegalArgumentException if the version is invalid
*/
public static void checkFormatVersion(int version)
throws IllegalArgumentException {
if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
throw new IllegalArgumentException("Invalid HFile version: " + version
+ " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
+ MAX_FORMAT_VERSION + ")");
}
}
public static void checkHFileVersion(final Configuration c) {
int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY +
" (in your hbase-*.xml files) is " + version + " which does not match " +
MAX_FORMAT_VERSION +
"; are you running with a configuration from an older or newer hbase install (an " +
"incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
}
}
public static void main(String[] args) throws Exception {
// delegate to preserve old behavior
HFilePrettyPrinter.main(args);
}
}