Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PARQUET-1729: Avoid AutoBoxing in EncodingStats #717

Merged
merged 4 commits into from Jan 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -25,6 +25,7 @@
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

import static org.apache.parquet.column.Encoding.PLAIN_DICTIONARY;
import static org.apache.parquet.column.Encoding.RLE_DICTIONARY;
Expand All @@ -36,12 +37,12 @@
* convenience methods for those checks, like {@link #hasNonDictionaryEncodedPages()}.
*/
public class EncodingStats {
final Map<Encoding, Integer> dictStats;
final Map<Encoding, Integer> dataStats;
final Map<Encoding, Number> dictStats;
final Map<Encoding, Number> dataStats;
private final boolean usesV2Pages;

private EncodingStats(Map<Encoding, Integer> dictStats,
Map<Encoding, Integer> dataStats,
private EncodingStats(Map<Encoding, Number> dictStats,
Map<Encoding, Number> dataStats,
boolean usesV2Pages) {
this.dictStats = dictStats;
this.dataStats = dataStats;
Expand All @@ -57,13 +58,13 @@ public Set<Encoding> getDataEncodings() {
}

public int getNumDictionaryPagesEncodedAs(Encoding enc) {
final Integer i = dictStats.get(enc);
return (i == null) ? 0 : i.intValue();
final Number pageCount = dictStats.get(enc);
return (pageCount == null) ? 0 : pageCount.intValue();
}

public int getNumDataPagesEncodedAs(Encoding enc) {
final Integer i = dataStats.get(enc);
return (i == null) ? 0 : i.intValue();
final Number pageCount = dataStats.get(enc);
return (pageCount == null) ? 0 : pageCount.intValue();
}

public boolean hasDictionaryPages() {
Expand Down Expand Up @@ -103,8 +104,8 @@ public boolean usesV2Pages() {
* Used to build {@link EncodingStats} from metadata or to accumulate stats as pages are written.
*/
public static class Builder {
private final Map<Encoding, Integer> dictStats = new LinkedHashMap<>();
private final Map<Encoding, Integer> dataStats = new LinkedHashMap<>();
private final Map<Encoding, AtomicInteger> dictStats = new LinkedHashMap<>();
private final Map<Encoding, AtomicInteger> dataStats = new LinkedHashMap<>();
private boolean usesV2Pages = false;

public Builder clear() {
Expand All @@ -124,8 +125,8 @@ public Builder addDictEncoding(Encoding encoding) {
}

public Builder addDictEncoding(Encoding encoding, int numPages) {
Integer pages = dictStats.get(encoding);
dictStats.put(encoding, numPages + (pages != null ? pages : 0));
dictStats.computeIfAbsent(encoding, enc -> new AtomicInteger(0))
.addAndGet(numPages);
return this;
}

Expand All @@ -141,8 +142,8 @@ public Builder addDataEncoding(Encoding encoding) {
}

public Builder addDataEncoding(Encoding encoding, int numPages) {
Integer pages = dataStats.get(encoding);
dataStats.put(encoding, numPages + (pages != null ? pages : 0));
dataStats.computeIfAbsent(encoding, enc -> new AtomicInteger(0))
.addAndGet(numPages);
return this;
}

Expand Down
Expand Up @@ -21,9 +21,6 @@

import org.junit.Test;

import java.util.HashMap;
import java.util.Map;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
Expand All @@ -39,28 +36,31 @@ public void testReusedBuilder() {
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
EncodingStats stats1 = builder.build();

Map<Encoding, Integer> expectedDictStats1 = new HashMap<>();
expectedDictStats1.put(Encoding.PLAIN, 1);
Map<Encoding, Integer> expectedDataStats1 = new HashMap<>();
expectedDataStats1.put(Encoding.RLE_DICTIONARY, 3);
expectedDataStats1.put(Encoding.DELTA_BYTE_ARRAY, 2);

builder.clear();
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
EncodingStats stats2 = builder.build();

Map<Encoding, Integer> expectedDictStats2 = new HashMap<>();
Map<Encoding, Integer> expectedDataStats2 = new HashMap<>();
expectedDataStats2.put(Encoding.PLAIN, 4);

assertEquals("Dictionary stats should be correct", expectedDictStats2, stats2.dictStats);
assertEquals("Data stats should be correct", expectedDataStats2, stats2.dataStats);

assertEquals("Dictionary stats should be correct after reuse", expectedDictStats1, stats1.dictStats);
assertEquals("Data stats should be correct after reuse", expectedDataStats1, stats1.dataStats);
assertEquals("Dictionary stats should be correct", 0,
stats2.dictStats.size());
assertEquals("Data stats size should be correct", 1,
stats2.dataStats.size());
assertEquals("Data stats content should be correct", 4,
stats2.dataStats.get(Encoding.PLAIN).intValue());

assertEquals("Dictionary stats size should be correct after reuse",
1, stats1.dictStats.size());
assertEquals("Dictionary stats content should be correct", 1,
stats1.dictStats.get(Encoding.PLAIN).intValue());

assertEquals("Data stats size should be correct after reuse", 2,
stats1.dataStats.size());
assertEquals("Data stats content should be correct", 3,
stats1.dataStats.get(Encoding.RLE_DICTIONARY).intValue());
assertEquals("Data stats content should be correct", 2,
stats1.dataStats.get(Encoding.DELTA_BYTE_ARRAY).intValue());
}

@Test
Expand Down