Skip to content

Commit 51dc912

Browse files
jkosh44keith-turner
authored andcommitted
fixes #946 Added mem efficient col buffer for GCiter (#952)
1 parent d123d42 commit 51dc912

File tree

3 files changed

+193
-29
lines changed

3 files changed

+193
-29
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3+
* agreements. See the NOTICE file distributed with this work for additional information regarding
4+
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5+
* "License"); you may not use this file except in compliance with the License. You may obtain a
6+
* copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software distributed under the License
11+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
* or implied. See the License for the specific language governing permissions and limitations under
13+
* the License.
14+
*/
15+
16+
package org.apache.fluo.accumulo.iterators;
17+
18+
import java.lang.IllegalArgumentException;
19+
import java.util.ArrayList;
20+
import java.util.Arrays;
21+
import java.util.function.LongPredicate;
22+
23+
import org.apache.accumulo.core.data.Key;
24+
import org.apache.accumulo.core.data.PartialKey;
25+
import org.apache.accumulo.core.data.Value;
26+
27+
/**
28+
* This class buffers Keys that all have the same row+column. Internally
29+
* it only stores one Key, a list of timestamps and a list of values. At iteration
30+
* time it materializes each Key+Value.
31+
*/
32+
class ColumnBuffer {
33+
34+
private Key key;
35+
private ArrayList<Long> timeStamps;
36+
private ArrayList<byte[]> values;
37+
38+
public ColumnBuffer() {
39+
40+
this.key = null;
41+
this.timeStamps = new ArrayList<>();
42+
this.values = new ArrayList<>();
43+
}
44+
45+
/**
46+
* @param timestamp Timestamp to be added to buffer
47+
* @param v Value to be added to buffer
48+
*/
49+
private void add(long timestamp, byte[] v) {
50+
51+
timeStamps.add(timestamp);
52+
values.add(v);
53+
}
54+
55+
/**
56+
* When empty, the first key added sets the row+column. After this all keys
57+
* added must have the same row+column.
58+
*
59+
* @param k Key to be added to buffer
60+
* @param v Value to be added to buffer
61+
*/
62+
public void add(Key k, byte[] vByte) throws IllegalArgumentException {
63+
vByte = Arrays.copyOf(vByte, vByte.length);
64+
65+
if (key == null) {
66+
key = new Key(k);
67+
add(k.getTimestamp(), vByte);
68+
} else if (key.equals(k, PartialKey.ROW_COLFAM_COLQUAL_COLVIS)) {
69+
add(k.getTimestamp(), vByte);
70+
} else {
71+
throw new IllegalArgumentException();
72+
}
73+
}
74+
75+
/**
76+
* When empty, the first key added sets the row+column. After this all keys
77+
* added must have the same row+column.
78+
*
79+
* @param k Key to be added to buffer
80+
* @param v Value to be added to buffer
81+
*/
82+
public void add(Key k, Value v) throws IllegalArgumentException {
83+
add(k, v.get());
84+
}
85+
86+
/**
87+
* Clears the dest ColumnBuffer and inserts all entries in dest where the timestamp passes
88+
* the timestampTest.
89+
*
90+
* @param dest Destination ColumnBuffer
91+
* @param timestampTest Test to determine which timestamps get added to dest
92+
*/
93+
public void copyTo(ColumnBuffer dest, LongPredicate timestampTest) {
94+
dest.clear();
95+
96+
if (key != null) {
97+
dest.key = new Key(key);
98+
}
99+
100+
for (int i = 0; i < timeStamps.size(); i++) {
101+
long time = timeStamps.get(i);
102+
if (timestampTest.test(time)) {
103+
dest.add(time, values.get(i));
104+
}
105+
}
106+
}
107+
108+
public void clear() {
109+
timeStamps.clear();
110+
values.clear();
111+
key = null;
112+
}
113+
114+
/**
115+
* @return the size of the current buffer
116+
*/
117+
public int size() {
118+
return timeStamps.size();
119+
}
120+
121+
/**
122+
* @param pos Position of the Key that will be retrieved
123+
* @return The key at a given position
124+
*/
125+
public Key getKey(int pos) {
126+
Key tmpKey = new Key(key);
127+
tmpKey.setTimestamp(timeStamps.get(pos));
128+
return tmpKey;
129+
}
130+
131+
/**
132+
* @param pos Position of the Value that will be retrieved
133+
* @return The value at a given position
134+
*/
135+
public Value getValue(int pos) {
136+
return new Value(values.get(pos));
137+
}
138+
}

modules/accumulo/src/main/java/org/apache/fluo/accumulo/iterators/GarbageCollectionIterator.java

+17-29
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Collection;
2222
import java.util.HashSet;
2323
import java.util.Map;
24+
import java.util.function.LongPredicate;
2425

2526
import com.google.common.annotations.VisibleForTesting;
2627
import org.apache.accumulo.core.client.IteratorSetting;
@@ -44,18 +45,6 @@
4445
*/
4546
public class GarbageCollectionIterator implements SortedKeyValueIterator<Key, Value> {
4647

47-
private static class KeyValue extends SimpleImmutableEntry<Key, Value> {
48-
private static final long serialVersionUID = 1L;
49-
50-
public KeyValue(Key key, Value value) {
51-
super(new Key(key), new Value(value));
52-
}
53-
54-
public KeyValue(Key key, byte[] value) {
55-
super(new Key(key), new Value(value));
56-
}
57-
}
58-
5948
@VisibleForTesting
6049
static final String GC_TIMESTAMP_OPT = "timestamp.gc";
6150

@@ -65,8 +54,8 @@ public KeyValue(Key key, byte[] value) {
6554
private Long gcTimestamp;
6655
private SortedKeyValueIterator<Key, Value> source;
6756

68-
private ArrayList<KeyValue> keys = new ArrayList<>();
69-
private ArrayList<KeyValue> keysFiltered = new ArrayList<>();
57+
private ColumnBuffer keys = new ColumnBuffer();
58+
private ColumnBuffer keysFiltered = new ColumnBuffer();
7059
private HashSet<Long> completeTxs = new HashSet<>();
7160
private HashSet<Long> rolledback = new HashSet<>();
7261
private Key curCol = new Key();
@@ -77,11 +66,11 @@ public KeyValue(Key key, byte[] value) {
7766
@Override
7867
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options,
7968
IteratorEnvironment env) throws IOException {
69+
8070
if (env.getIteratorScope() == IteratorScope.scan) {
8171
throw new IllegalArgumentException();
8272
}
8373
this.source = source;
84-
8574
isFullMajc = env.getIteratorScope() == IteratorScope.majc && env.isFullMajorCompaction();
8675

8776
String oats = options.get(GC_TIMESTAMP_OPT);
@@ -96,6 +85,7 @@ public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String>
9685
}
9786
}
9887

88+
9989
@Override
10090
public boolean hasTop() {
10191
return position < keysFiltered.size() || source.hasTop();
@@ -191,7 +181,7 @@ private void readColMetadata() throws IOException {
191181
long ts = source.getTopKey().getTimestamp() & ColumnConstants.TIMESTAMP_MASK;
192182

193183
if (colType == ColumnConstants.TX_DONE_PREFIX) {
194-
keys.add(new KeyValue(source.getTopKey(), source.getTopValue()));
184+
keys.add(source.getTopKey(), source.getTopValue());
195185
completeTxs.add(ts);
196186
} else if (colType == ColumnConstants.WRITE_PREFIX) {
197187
boolean keep = false;
@@ -224,7 +214,7 @@ private void readColMetadata() throws IOException {
224214
}
225215

226216
if (keep) {
227-
keys.add(new KeyValue(source.getTopKey(), val));
217+
keys.add(source.getTopKey(), val);
228218
} else if (complete) {
229219
completeTxs.remove(ts);
230220
}
@@ -249,21 +239,21 @@ private void readColMetadata() throws IOException {
249239
}
250240

251241
if (keep) {
252-
keys.add(new KeyValue(source.getTopKey(), source.getTopValue()));
242+
keys.add(source.getTopKey(), source.getTopValue());
253243
} else if (complete) {
254244
completeTxs.remove(txDoneTs);
255245
}
256246
} else if (colType == ColumnConstants.LOCK_PREFIX) {
257247
if (ts > invalidationTime) {
258-
keys.add(new KeyValue(source.getTopKey(), source.getTopValue()));
248+
keys.add(source.getTopKey(), source.getTopValue());
259249
}
260250
} else if (colType == ColumnConstants.DATA_PREFIX) {
261251
// can stop looking
262252
break;
263253
} else if (colType == ColumnConstants.ACK_PREFIX) {
264254
if (!sawAck) {
265255
if (ts >= firstWrite) {
266-
keys.add(new KeyValue(source.getTopKey(), source.getTopValue()));
256+
keys.add(source.getTopKey(), source.getTopValue());
267257
}
268258
sawAck = true;
269259
}
@@ -274,22 +264,20 @@ private void readColMetadata() throws IOException {
274264
source.next();
275265
}
276266

277-
for (KeyValue kv : keys) {
278-
long colType = kv.getKey().getTimestamp() & ColumnConstants.PREFIX_MASK;
267+
keys.copyTo(keysFiltered, (timestamp -> {
268+
long colType = timestamp & ColumnConstants.PREFIX_MASK;
279269
if (colType == ColumnConstants.TX_DONE_PREFIX) {
280-
if (completeTxs.contains(kv.getKey().getTimestamp() & ColumnConstants.TIMESTAMP_MASK)) {
281-
keysFiltered.add(kv);
282-
}
270+
return completeTxs.contains(timestamp & ColumnConstants.TIMESTAMP_MASK);
283271
} else {
284-
keysFiltered.add(kv);
272+
return true;
285273
}
286-
}
274+
}));
287275
}
288276

289277
@Override
290278
public Key getTopKey() {
291279
if (position < keysFiltered.size()) {
292-
return keysFiltered.get(position).getKey();
280+
return keysFiltered.getKey(position);
293281
} else {
294282
return source.getTopKey();
295283
}
@@ -298,7 +286,7 @@ public Key getTopKey() {
298286
@Override
299287
public Value getTopValue() {
300288
if (position < keysFiltered.size()) {
301-
return keysFiltered.get(position).getValue();
289+
return keysFiltered.getValue(position);
302290
} else {
303291
return source.getTopValue();
304292
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3+
* agreements. See the NOTICE file distributed with this work for additional information regarding
4+
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5+
* "License"); you may not use this file except in compliance with the License. You may obtain a
6+
* copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software distributed under the License
11+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
* or implied. See the License for the specific language governing permissions and limitations under
13+
* the License.
14+
*/
15+
16+
package org.apache.fluo.accumulo.iterators;
17+
18+
import java.lang.IllegalArgumentException;
19+
20+
import org.apache.accumulo.core.data.Key;
21+
import org.apache.accumulo.core.data.Value;
22+
import org.junit.Assert;
23+
import org.junit.Test;
24+
25+
public class ColumnBufferTest {
26+
27+
@Test
28+
public void testDifferentKeys() {
29+
ColumnBuffer columnBuffer = new ColumnBuffer();
30+
columnBuffer.add(new Key("row1"), new Value());
31+
try {
32+
columnBuffer.add(new Key("row2"), new Value());
33+
Assert.fail();
34+
} catch (IllegalArgumentException e) {
35+
36+
}
37+
}
38+
}

0 commit comments

Comments
 (0)