Skip to content

Commit

Permalink
Always keep enough space for pointers to overflow blocks in string se…
Browse files Browse the repository at this point in the history
…gments
  • Loading branch information
Mytherin committed Nov 2, 2019
1 parent b2c1b28 commit aa21adf
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 29 deletions.
9 changes: 8 additions & 1 deletion src/include/storage/string_segment.hpp
Expand Up @@ -101,13 +101,20 @@ class StringSegment : public UncompressedSegment {
string_update_info_t MergeStringUpdate(SegmentStatistics &stats, Vector &update, row_t *ids, index_t vector_offset, StringUpdateInfo &update_info);

void MergeUpdateInfo(UpdateInfo *node, Vector &update, row_t *ids, index_t vector_offset, string_location_t string_locations[], nullmask_t original_nullmask);

//! The amount of bytes remaining to store in the block
index_t RemainingSpace() {
return Storage::BLOCK_SIZE - dictionary_offset - max_vector_count * vector_size;
}
private:
//! The max string size that is allowed within a block. Strings bigger than this will be labeled as a BIG STRING and offloaded to the overflow blocks.
static constexpr uint16_t STRING_BLOCK_LIMIT = 4096;
//! Marker used in length field to indicate the presence of a big string
static constexpr uint16_t BIG_STRING_MARKER = (uint16_t) -1;
//! Base size of big string marker (block id + offset)
static constexpr index_t BIG_STRING_MARKER_BASE_SIZE = sizeof(block_id_t) + sizeof(int32_t);
//! The marker size of the big string
static constexpr index_t BIG_STRING_MARKER_SIZE = sizeof(block_id_t) + sizeof(int32_t) + sizeof(uint16_t);
static constexpr index_t BIG_STRING_MARKER_SIZE = BIG_STRING_MARKER_BASE_SIZE + sizeof(uint16_t);
};

}
16 changes: 12 additions & 4 deletions src/storage/string_segment.cpp
Expand Up @@ -285,8 +285,7 @@ index_t StringSegment::Append(SegmentStatistics &stats, Vector &data, index_t of
if (vector_index == max_vector_count) {
// we are at the maximum vector, check if there is space to increase the maximum vector count
// as a heuristic, we only allow another vector to be added if we have at least 32 bytes per string remaining (32KB out of a 256KB block, or around 12% empty)
index_t remaining_space = Storage::BLOCK_SIZE - dictionary_offset - max_vector_count * vector_size;
if (remaining_space >= STANDARD_VECTOR_SIZE * 32) {
if (RemainingSpace() >= STANDARD_VECTOR_SIZE * 32) {
// we have enough remaining space to add another vector
ExpandStringSegment(handle->node->buffer);
} else {
Expand All @@ -312,22 +311,30 @@ void StringSegment::AppendData(SegmentStatistics &stats, data_ptr_t target, data
auto &result_nullmask = *((nullmask_t*) target);
auto result_data = (int32_t *) (target + sizeof(nullmask_t));

index_t remaining_strings = STANDARD_VECTOR_SIZE - (this->tuple_count % STANDARD_VECTOR_SIZE);
VectorOperations::Exec(source.sel_vector, count + offset, [&](index_t i, index_t k) {
if (source.nullmask[i]) {
// null value is stored as -1
result_data[k - offset + target_offset] = 0;
result_nullmask[k - offset + target_offset] = true;
stats.has_null = true;
} else {
assert(dictionary_offset < Storage::BLOCK_SIZE);
// non-null value, check if we can fit it within the block
// we also always store strings that have a size >= STRING_BLOCK_LIMIT in the overflow blocks
index_t string_length = strlen(ldata[i]);
index_t total_length = string_length + 1 + sizeof(uint16_t);

if (string_length > stats.max_string_length) {
stats.max_string_length = string_length;
}
if (total_length >= STRING_BLOCK_LIMIT || total_length > Storage::BLOCK_SIZE - dictionary_offset - max_vector_count * vector_size) {
// determine hwether or not the string needs to be stored in an overflow block
// we never place small strings in the overflow blocks: the pointer would take more space than the string itself
// we always place big strings (>= STRING_BLOCK_LIMIT) in the overflow blocks
// we also have to always leave enough room for BIG_STRING_MARKER_SIZE for each of the remaining strings
if (total_length > BIG_STRING_MARKER_BASE_SIZE &&
(total_length >= STRING_BLOCK_LIMIT ||
total_length + (remaining_strings * BIG_STRING_MARKER_SIZE) > RemainingSpace())) {
assert(RemainingSpace() >= BIG_STRING_MARKER_SIZE);
// string is too big for block: write to overflow blocks
block_id_t block;
int32_t offset;
Expand Down Expand Up @@ -356,6 +363,7 @@ void StringSegment::AppendData(SegmentStatistics &stats, data_ptr_t target, data
// place the dictionary offset into the set of vectors
result_data[k - offset + target_offset] = dictionary_offset;
}
remaining_strings--;
}, offset);
}

Expand Down
51 changes: 27 additions & 24 deletions test/sql/transactions/test_transaction_local.cpp
Expand Up @@ -131,22 +131,33 @@ TEST_CASE("Test operations on transaction local data with unique indices", "[tra
con.EnableQueryVerification();

// perform different operations on the same data within one transaction
REQUIRE_NO_FAIL(con.Query("BEGIN TRANSACTION"));
REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER PRIMARY KEY, j INTEGER)"));

// append
REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1, 3), (2, 3)"));

result = con.Query("SELECT * FROM integers ORDER BY 1");
REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
REQUIRE(CHECK_COLUMN(result, 1, {3, 3}));

// appending the same value again fails
REQUIRE_FAIL(con.Query("INSERT INTO integers VALUES (1, 2)"));
// updating also fails if there is a conflict
REQUIRE_FAIL(con.Query("UPDATE integers SET i=1 WHERE i=2"));
// but not if there is no conflict
REQUIRE_NO_FAIL(con.Query("UPDATE integers SET i=3 WHERE i=2"));
for(index_t i = 0; i < 3; i++) {
REQUIRE_NO_FAIL(con.Query("BEGIN TRANSACTION"));
REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER PRIMARY KEY, j INTEGER)"));
REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1, 3), (2, 3)"));

result = con.Query("SELECT * FROM integers ORDER BY 1");
REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
REQUIRE(CHECK_COLUMN(result, 1, {3, 3}));

switch(i) {
case 0:
// appending the same value again fails
REQUIRE_FAIL(con.Query("INSERT INTO integers VALUES (1, 2)"));
REQUIRE_NO_FAIL(con.Query("ROLLBACK"));
break;
case 1:
// updating also fails if there is a conflict
REQUIRE_FAIL(con.Query("UPDATE integers SET i=1 WHERE i=2"));
REQUIRE_NO_FAIL(con.Query("ROLLBACK"));
break;
default:
// but not if there is no conflict
REQUIRE_NO_FAIL(con.Query("UPDATE integers SET i=3 WHERE i=2"));
REQUIRE_NO_FAIL(con.Query("COMMIT"));
break;
}
}

result = con.Query("SELECT * FROM integers ORDER BY 1");
REQUIRE(CHECK_COLUMN(result, 0, {1, 3}));
Expand All @@ -163,14 +174,6 @@ TEST_CASE("Test operations on transaction local data with unique indices", "[tra
result = con.Query("SELECT * FROM integers ORDER BY 1");
REQUIRE(CHECK_COLUMN(result, 0, {1, 3}));
REQUIRE(CHECK_COLUMN(result, 1, {3, 3}));

// commit
REQUIRE_NO_FAIL(con.Query("COMMIT"));

// we can still read the table now
result = con.Query("SELECT * FROM integers ORDER BY 1");
REQUIRE(CHECK_COLUMN(result, 0, {1, 3}));
REQUIRE(CHECK_COLUMN(result, 1, {3, 3}));
}

TEST_CASE("Test transaction aborts after failures", "[transactions]") {
Expand Down

0 comments on commit aa21adf

Please sign in to comment.