Skip to content
Permalink
Browse files
[ASTERIXDB-3016][RT] Fix failure in hash groupby
- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Modify hash group by to force garbage collection on the
  hash table if a tuple could not be inserted into it
- Make hash group by clean up its run files in case
  of an error

Change-Id: I7a133fa1d0555ebbcb7a9e3cb7445757716c9a2a
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15325
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Till Westmann <till@couchbase.com>
  • Loading branch information
Dmitry Lychagin authored and AliSolaiman committed Feb 17, 2022
1 parent 0cd0e73 commit 3d79c9f39392d6e2e5127b716788e4335014606b
Showing 8 changed files with 117 additions and 14 deletions.
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

drop dataverse tpcds if exists;
create dataverse tpcds;

use tpcds;

create dataset item(i_item_sk string not unknown) open type primary key i_item_sk;

create dataset inventory(inv_date_sk string not unknown, inv_item_sk string not unknown,
inv_warehouse_sk string not unknown) open type primary key inv_date_sk, inv_item_sk, inv_warehouse_sk;
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

use tpcds;

set `import-private-functions` `true`;

insert into item (select value object_remove(t, "table_name") from tpcds_datagen("item", 0.5) t);

insert into inventory (select value object_remove(t, "table_name") from tpcds_datagen("inventory", 0.5) t);
@@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

use tpcds;

SELECT ROUND(AVG(inv.inv_quantity_on_hand), 1) qoh, i.i_product_name
FROM inventory inv, item i
WHERE inv.inv_item_sk /*+hash-bcast*/ = i.i_item_sk
/*+ hash */ GROUP BY i.i_product_name
ORDER BY qoh, i.i_product_name
LIMIT 1;
@@ -0,0 +1 @@
{ "qoh": 402.0, "i_product_name": "ableoughtn st" }
@@ -6099,6 +6099,11 @@
<output-dir compare="Text">hash-group-by-decor</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="group-by">
<compilation-unit name="query-ASTERIXDB-3016">
<output-dir compare="Text">query-ASTERIXDB-3016</output-dir>
</compilation-unit>
</test-case>
</test-group>
<test-group name="index-join">
<test-case FilePath="index-join">
@@ -177,16 +177,21 @@ public void clear(int partition) throws HyracksDataException {
}

// Checks whether the garbage collection is required and conducts a garbage collection if so.
if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
collectGarbageInHashTableForTuplePointer(false);
bufferManager.clearPartition(partition);
}

private boolean collectGarbageInHashTableForTuplePointer(boolean force) throws HyracksDataException {
if (force || hashTableForTuplePointer.isGarbageCollectionNeeded()) {
int numberOfFramesReclaimed =
hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Garbage Collection on Hash table is done. Deallocated frames:"
+ numberOfFramesReclaimed);
}
return numberOfFramesReclaimed != -1;
}

bufferManager.clearPartition(partition);
return false;
}

private int getPartition(int entryInHashTable) {
@@ -234,11 +239,18 @@ private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccesso
}

// Insertion to the hash table
if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
// To preserve the atomicity of this method, we need to undo the effect
// of the above bufferManager.insertTuple() call since the given insertion has failed.
bufferManager.cancelInsertTuple(pid);
return false;
boolean inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
if (!inserted) {
// Force garbage collection on the hash table and attempt to insert again
if (collectGarbageInHashTableForTuplePointer(true)) {
inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
}
if (!inserted) {
// To preserve the atomicity of this method, we need to undo the effect
// of the above bufferManager.insertTuple() call since the given insertion has failed.
bufferManager.cancelInsertTuple(pid);
return false;
}
}

return true;
@@ -142,11 +142,15 @@ private void doPass(ISpillableTable table, RunFileWriter[] runs, int[] numOfTupl
runs[i].getFileSize(), gbyFields, fdFields, groupByComparators, nmkComputer,
mergeAggregatorFactory, partialAggRecordDesc, outRecordDesc, frameLimit, level);
RunFileWriter[] runFileWriters = new RunFileWriter[partitionTable.getNumPartitions()];
int[] sizeInTuplesNextLevel =
buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
if (runFileWriters[idFile] != null) {
generatedRuns.add(runFileWriters[idFile]);
int[] sizeInTuplesNextLevel;
try {
sizeInTuplesNextLevel =
buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
} finally {
for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
if (runFileWriters[idFile] != null) {
generatedRuns.add(runFileWriters[idFile]);
}
}
}

@@ -54,7 +54,7 @@ public interface ISerializableTable {
* required to access the real tuple to calculate the original hash value
* @param tpc:
* hash function
* @return the number of frames that are reclaimed.
* @return the number of frames that are reclaimed. The value -1 is returned when no compaction was happened.
* @throws HyracksDataException
*/
int collectGarbage(ITuplePointerAccessor bufferAccessor, ITuplePartitionComputer tpc) throws HyracksDataException;

0 comments on commit 3d79c9f

Please sign in to comment.