From c7daaaf5e63d0bd1d444385e62611fe276f6ce29 Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Tue, 27 May 2014 18:40:45 +0200 Subject: [PATCH] erasure-code: implement alignment on chunk sizes jerasure expects chunk sizes that are aligned on the largest possible vector size that could be used by SSE instructions, when available ( LARGEST_VECTOR_WORDSIZE == 16 bytes ). For techniques derived from Cauchy, encoding and decoding is done by subdividing the chunk into packets of packetsize bytes. The operations are done w * packetsize bytes at a time. It follows that each chunk must have a size that is a multiple of w * packetsize bytes. For techniques derived from Vandermonde, it is enough for a chunk to be a multiple of w * LARGEST_VECTOR_WORDSIZE. ErasureCodeJerasure::get_alignment returns a size alignment constraint that has to be enforced as a multiple of the object size. The resulting object size then has to match the chunk constraints described above although they have no relationship with K. For Cauchy, it leads to excessive padding, making it impossible to set sensible parameters for when the object size is small. When the per_chunk_alignement data member is true, the semantic of ErasureCodeJerasure::get_alignment is changed to return a size alignment constraint to be enforced as a multiple of the chunk size. The ErasureCodeJerasure::get_chunk_size method is modified to use the new semantic when appropriate. The jerasure-per-chunk-alignement parameter is parsed to set per_chunk_alignement for the Vandermonde and Cauchy techniques. The memory address of a chunk is implicitly aligned to a page boundary because it is allocated with buffer::create_page_aligned. http://tracker.ceph.com/issues/8475 Fixes: #8475 Signed-off-by: Loic Dachary --- .../jerasure/ErasureCodeJerasure.cc | 81 +++++++--- .../jerasure/ErasureCodeJerasure.h | 7 +- .../erasure-code/TestErasureCodeJerasure.cc | 143 +++++++++--------- 3 files changed, 143 insertions(+), 88 deletions(-) diff --git a/src/erasure-code/jerasure/ErasureCodeJerasure.cc b/src/erasure-code/jerasure/ErasureCodeJerasure.cc index 3fa1f6cdb446c..8b982ce3dc206 100644 --- a/src/erasure-code/jerasure/ErasureCodeJerasure.cc +++ b/src/erasure-code/jerasure/ErasureCodeJerasure.cc @@ -28,7 +28,6 @@ extern "C" { #include "liberation.h" } -// FIXME(loic) this may be too conservative, check back with feedback from Andreas #define LARGEST_VECTOR_WORDSIZE 16 #define dout_subsys ceph_subsys_osd @@ -65,10 +64,26 @@ void ErasureCodeJerasure::init(const map ¶meters) unsigned int ErasureCodeJerasure::get_chunk_size(unsigned int object_size) const { unsigned alignment = get_alignment(); - unsigned tail = object_size % alignment; - unsigned padded_length = object_size + ( tail ? ( alignment - tail ) : 0 ); - assert(padded_length % k == 0); - return padded_length / k; + if (per_chunk_alignment) { + unsigned chunk_size = object_size / k; + if (object_size % k) + chunk_size++; + dout(20) << "get_chunk_size: chunk_size " << chunk_size + << " must be modulo " << alignment << dendl; + assert(alignment <= chunk_size); + unsigned modulo = chunk_size % alignment; + if (modulo) { + dout(10) << "get_chunk_size: " << chunk_size + << " padded to " << chunk_size + alignment - modulo << dendl; + chunk_size += alignment - modulo; + } + return chunk_size; + } else { + unsigned tail = object_size % alignment; + unsigned padded_length = object_size + ( tail ? ( alignment - tail ) : 0 ); + assert(padded_length % k == 0); + return padded_length / k; + } } int ErasureCodeJerasure::minimum_to_decode(const set &want_to_read, @@ -205,6 +220,19 @@ int ErasureCodeJerasure::to_int(const std::string &name, return r; } +bool ErasureCodeJerasure::to_bool(const std::string &name, + const map ¶meters, + bool default_value) +{ + if (parameters.find(name) == parameters.end() || + parameters.find(name)->second.size() == 0) { + dout(10) << name << " defaults to " << default_value << dendl; + return default_value; + } + const std::string value = parameters.find(name)->second; + return (value == "yes") || (value == "1") || (value == "true"); +} + bool ErasureCodeJerasure::is_prime(int value) { int prime55[] = { @@ -241,11 +269,14 @@ int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() const { - unsigned alignment = k*w*sizeof(int); - if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) - alignment = k*w*LARGEST_VECTOR_WORDSIZE; - return alignment; - + if (per_chunk_alignment) { + return w * LARGEST_VECTOR_WORDSIZE; + } else { + unsigned alignment = k*w*sizeof(int); + if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*LARGEST_VECTOR_WORDSIZE; + return alignment; + } } void ErasureCodeJerasureReedSolomonVandermonde::parse(const map ¶meters) @@ -258,6 +289,7 @@ void ErasureCodeJerasureReedSolomonVandermonde::parse(const map ¶meters) @@ -330,10 +366,18 @@ int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureCauchy::get_alignment() const { - unsigned alignment = k*w*packetsize*sizeof(int); - if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) - alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; - return alignment; + if (per_chunk_alignment) { + unsigned alignment = w * packetsize; + unsigned modulo = alignment % LARGEST_VECTOR_WORDSIZE; + if (modulo) + alignment += LARGEST_VECTOR_WORDSIZE - modulo; + return alignment; + } else { + unsigned alignment = k*w*packetsize*sizeof(int); + if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; + return alignment; + } } void ErasureCodeJerasureCauchy::parse(const map ¶meters) @@ -348,6 +392,7 @@ void ErasureCodeJerasureCauchy::parse(const map ¶me w = DEFAULT_W; } packetsize = to_int("packetsize", parameters, DEFAULT_PACKETSIZE); + per_chunk_alignment = to_bool("jerasure-per-chunk-alignment", parameters, false); } void ErasureCodeJerasureCauchy::prepare_schedule(int *matrix) diff --git a/src/erasure-code/jerasure/ErasureCodeJerasure.h b/src/erasure-code/jerasure/ErasureCodeJerasure.h index f5255497a8017..97257550b09ab 100644 --- a/src/erasure-code/jerasure/ErasureCodeJerasure.h +++ b/src/erasure-code/jerasure/ErasureCodeJerasure.h @@ -27,11 +27,13 @@ class ErasureCodeJerasure : public ErasureCodeInterface { const char *technique; string ruleset_root; string ruleset_failure_domain; + bool per_chunk_alignment; ErasureCodeJerasure(const char *_technique) : technique(_technique), ruleset_root("default"), - ruleset_failure_domain("host") + ruleset_failure_domain("host"), + per_chunk_alignment(false) {} virtual ~ErasureCodeJerasure() {} @@ -80,6 +82,9 @@ class ErasureCodeJerasure : public ErasureCodeInterface { static int to_int(const std::string &name, const map ¶meters, int default_value); + static bool to_bool(const std::string &name, + const map ¶meters, + bool default_value); static bool is_prime(int value); }; diff --git a/src/test/erasure-code/TestErasureCodeJerasure.cc b/src/test/erasure-code/TestErasureCodeJerasure.cc index 2ec8f184df114..5a164bbfab77e 100644 --- a/src/test/erasure-code/TestErasureCodeJerasure.cc +++ b/src/test/erasure-code/TestErasureCodeJerasure.cc @@ -42,70 +42,77 @@ TYPED_TEST_CASE(ErasureCodeTest, JerasureTypes); TYPED_TEST(ErasureCodeTest, encode_decode) { - TypeParam jerasure; - map parameters; - parameters["k"] = "2"; - parameters["m"] = "2"; - parameters["w"] = "7"; - parameters["packetsize"] = "8"; - jerasure.init(parameters); + const char *per_chunk_alignments[] = { "false", "true" }; + for (int per_chunk_alignment = 0 ; + per_chunk_alignment < 2; + per_chunk_alignment++) { + TypeParam jerasure; + map parameters; + parameters["k"] = "2"; + parameters["m"] = "2"; + parameters["w"] = "7"; + parameters["packetsize"] = "8"; + parameters["jerasure-per-chunk-alignment"] = + per_chunk_alignments[per_chunk_alignment]; + jerasure.init(parameters); #define LARGE_ENOUGH 2048 - bufferptr in_ptr(buffer::create_page_aligned(LARGE_ENOUGH)); - in_ptr.zero(); - in_ptr.set_length(0); - const char *payload = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; - in_ptr.append(payload, strlen(payload)); - bufferlist in; - in.push_front(in_ptr); - int want_to_encode[] = { 0, 1, 2, 3 }; - map encoded; - EXPECT_EQ(0, jerasure.encode(set(want_to_encode, want_to_encode+4), - in, - &encoded)); - EXPECT_EQ(4u, encoded.size()); - unsigned length = encoded[0].length(); - EXPECT_EQ(0, strncmp(encoded[0].c_str(), in.c_str(), length)); - EXPECT_EQ(0, strncmp(encoded[1].c_str(), in.c_str() + length, - in.length() - length)); - - - // all chunks are available - { - int want_to_decode[] = { 0, 1 }; - map decoded; - EXPECT_EQ(0, jerasure.decode(set(want_to_decode, want_to_decode+2), - encoded, - &decoded)); - EXPECT_EQ(2u, decoded.size()); - EXPECT_EQ(length, decoded[0].length()); - EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length)); - EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length, + bufferptr in_ptr(buffer::create_page_aligned(LARGE_ENOUGH)); + in_ptr.zero(); + in_ptr.set_length(0); + const char *payload = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + in_ptr.append(payload, strlen(payload)); + bufferlist in; + in.push_front(in_ptr); + int want_to_encode[] = { 0, 1, 2, 3 }; + map encoded; + EXPECT_EQ(0, jerasure.encode(set(want_to_encode, want_to_encode+4), + in, + &encoded)); + EXPECT_EQ(4u, encoded.size()); + unsigned length = encoded[0].length(); + EXPECT_EQ(0, strncmp(encoded[0].c_str(), in.c_str(), length)); + EXPECT_EQ(0, strncmp(encoded[1].c_str(), in.c_str() + length, in.length() - length)); - } - // two chunks are missing - { - map degraded = encoded; - degraded.erase(0); - degraded.erase(1); - EXPECT_EQ(2u, degraded.size()); - int want_to_decode[] = { 0, 1 }; - map decoded; - EXPECT_EQ(0, jerasure.decode(set(want_to_decode, want_to_decode+2), - degraded, - &decoded)); - // always decode all, regardless of want_to_decode - EXPECT_EQ(4u, decoded.size()); - EXPECT_EQ(length, decoded[0].length()); - EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length)); - EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length, - in.length() - length)); + + // all chunks are available + { + int want_to_decode[] = { 0, 1 }; + map decoded; + EXPECT_EQ(0, jerasure.decode(set(want_to_decode, want_to_decode+2), + encoded, + &decoded)); + EXPECT_EQ(2u, decoded.size()); + EXPECT_EQ(length, decoded[0].length()); + EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length)); + EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length, + in.length() - length)); + } + + // two chunks are missing + { + map degraded = encoded; + degraded.erase(0); + degraded.erase(1); + EXPECT_EQ(2u, degraded.size()); + int want_to_decode[] = { 0, 1 }; + map decoded; + EXPECT_EQ(0, jerasure.decode(set(want_to_decode, want_to_decode+2), + degraded, + &decoded)); + // always decode all, regardless of want_to_decode + EXPECT_EQ(4u, decoded.size()); + EXPECT_EQ(length, decoded[0].length()); + EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length)); + EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length, + in.length() - length)); + } } } @@ -216,7 +223,7 @@ TEST(ErasureCodeTest, encode) parameters["w"] = "8"; jerasure.init(parameters); - unsigned alignment = jerasure.get_alignment(); + unsigned aligned_object_size = jerasure.get_alignment() * 2; { // // When the input bufferlist needs to be padded because @@ -225,17 +232,16 @@ TEST(ErasureCodeTest, encode) bufferlist in; map encoded; int want_to_encode[] = { 0, 1, 2, 3 }; - int trail_length = 10; - in.append(string(alignment + trail_length, 'X')); + int trail_length = 1; + in.append(string(aligned_object_size + trail_length, 'X')); EXPECT_EQ(0, jerasure.encode(set(want_to_encode, want_to_encode+4), in, &encoded)); EXPECT_EQ(4u, encoded.size()); - for(int i = 0; i < 4; i++) - EXPECT_EQ(alignment, encoded[i].length()); char *last_chunk = encoded[1].c_str(); + int length =encoded[1].length(); EXPECT_EQ('X', last_chunk[0]); - EXPECT_EQ('\0', last_chunk[trail_length]); + EXPECT_EQ('\0', last_chunk[length - trail_length]); } { @@ -251,11 +257,10 @@ TEST(ErasureCodeTest, encode) map encoded; set want_to_encode; want_to_encode.insert(0); - int trail_length = 10; - in.append(string(alignment + trail_length, 'X')); + int trail_length = 1; + in.append(string(aligned_object_size + trail_length, 'X')); EXPECT_EQ(0, jerasure.encode(want_to_encode, in, &encoded)); EXPECT_EQ(1u, encoded.size()); - EXPECT_EQ(alignment, encoded[0].length()); } }