/
GenericDataFile.java
106 lines (97 loc) · 3.3 KB
/
GenericDataFile.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.types.Types;
class GenericDataFile extends BaseFile<DataFile> implements DataFile {
/** Used by Avro reflection to instantiate this class when reading manifest files. */
GenericDataFile(Schema avroSchema) {
super(avroSchema);
}
GenericDataFile(
int specId,
String filePath,
FileFormat format,
PartitionData partition,
long fileSizeInBytes,
Metrics metrics,
ByteBuffer keyMetadata,
List<Long> splitOffsets,
int[] equalityFieldIds,
Integer sortOrderId) {
super(
specId,
FileContent.DATA,
filePath,
format,
partition,
fileSizeInBytes,
metrics.recordCount(),
metrics.columnSizes(),
metrics.valueCounts(),
metrics.nullValueCounts(),
metrics.nanValueCounts(),
metrics.lowerBounds(),
metrics.upperBounds(),
splitOffsets,
equalityFieldIds,
sortOrderId,
keyMetadata);
}
/**
* Copy constructor.
*
* @param toCopy a generic data file to copy.
* @param fullCopy whether to copy all fields or to drop column-level stats.
* @param statsToKeep a set of column ids to keep stats. If empty or <code>null</code> then every
* column stat is kept.
*/
private GenericDataFile(GenericDataFile toCopy, boolean fullCopy, Set<Integer> statsToKeep) {
super(toCopy, fullCopy, statsToKeep);
}
/** Constructor for Java serialization. */
GenericDataFile() {}
@Override
public DataFile copyWithoutStats() {
return new GenericDataFile(this, false /* drop stats */, ImmutableSet.of());
}
@Override
public DataFile copyWithStats(Set<Integer> statsToKeep) {
return new GenericDataFile(this, true, statsToKeep);
}
@Override
public DataFile copy() {
return new GenericDataFile(this, true /* full copy */, ImmutableSet.of());
}
@Override
protected Schema getAvroSchema(Types.StructType partitionStruct) {
Types.StructType type = DataFile.getType(partitionStruct);
return AvroSchemaUtil.convert(
type,
ImmutableMap.of(
type, GenericDataFile.class.getName(),
partitionStruct, PartitionData.class.getName()));
}
}