Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crush: optimized crush algorithm for balanced pg distribution among osds #2402

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/crush/CrushCompiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ int CrushCompiler::decompile_bucket_impl(int i, ostream &out)
bool dopos = false;
switch (alg) {
case CRUSH_BUCKET_UNIFORM:
case CRUSH_BUCKET_LINEAR:
out << "\t# do not change bucket size (" << n << ") unnecessarily";
dopos = true;
break;
Expand Down Expand Up @@ -435,6 +436,8 @@ int CrushCompiler::parse_bucket(iter_t const& i)
alg = CRUSH_BUCKET_TREE;
else if (a == "straw")
alg = CRUSH_BUCKET_STRAW;
else if (a == "linear")
alg = CRUSH_BUCKET_LINEAR;
else {
err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
return -EINVAL;
Expand Down Expand Up @@ -512,7 +515,7 @@ int CrushCompiler::parse_bucket(iter_t const& i)
assert(0);

}
if (alg == CRUSH_BUCKET_UNIFORM) {
if (alg == CRUSH_BUCKET_UNIFORM || alg == CRUSH_BUCKET_LINEAR) {
if (!have_uniform_weight) {
have_uniform_weight = true;
uniform_weight = weight;
Expand Down
11 changes: 11 additions & 0 deletions src/crush/CrushWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,10 @@ void CrushWrapper::encode(bufferlist& bl, bool lean) const
}
break;

case CRUSH_BUCKET_LINEAR:
::encode(((crush_bucket_linear*)crush->buckets[i])->item_weight, bl);
break;

default:
assert(0);
break;
Expand Down Expand Up @@ -1108,6 +1112,9 @@ void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator
case CRUSH_BUCKET_STRAW:
size = sizeof(crush_bucket_straw);
break;
case CRUSH_BUCKET_LINEAR:
size = sizeof(crush_bucket_linear);
break;
default:
{
char str[128];
Expand Down Expand Up @@ -1171,6 +1178,10 @@ void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator
break;
}

case CRUSH_BUCKET_LINEAR:
::decode(((crush_bucket_linear*)bucket)->item_weight, blp);
break;

default:
// We should have handled this case in the first switch statement
assert(0);
Expand Down
14 changes: 14 additions & 0 deletions src/crush/CrushWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,20 @@ class CrushWrapper {
out[i] = rawout[i];
}

void do_rule(int rule, int x, vector<int>& out, int maxout,
const vector<__u32>& weight, float balance_param) const {
Mutex::Locker l(mapper_lock);
int rawout[maxout];
int scratch[maxout * 3];
int numrep = crush_do_rule_wrapper(crush, rule, x, rawout, maxout,
&weight[0], weight.size(), scratch, balance_param);
if (numrep < 0)
numrep = 0;
out.resize(numrep);
for (int i=0; i<numrep; i++)
out[i] = rawout[i];
}

int read_from_file(const char *fn) {
bufferlist bl;
std::string error;
Expand Down
158 changes: 158 additions & 0 deletions src/crush/builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,53 @@ crush_make_straw_bucket(int hash,
return NULL;
}

/* linear bucket */

struct crush_bucket_linear *
crush_make_linear_bucket(int hash, int type, int size,
int *items,
int item_weight)
{
int i;
struct crush_bucket_linear *bucket;

if (crush_multiplication_is_unsafe(size, item_weight)) {
//need more info
//printf("");
return NULL;
}

bucket = malloc(sizeof(*bucket));
if (!bucket)
return NULL;
memset(bucket, 0, sizeof(*bucket));
bucket->h.alg = CRUSH_BUCKET_LINEAR;
bucket->h.hash = hash;
bucket->h.type = type;
bucket->h.size = size;

bucket->h.weight = size * item_weight;
bucket->item_weight = item_weight;
bucket->h.items = malloc(sizeof(__s32) * size);

if (!bucket->h.items)
goto err;

bucket->h.perm = malloc(sizeof(__u32) * size);

if (!bucket->h.perm)
goto err;
for (i=0; i<size; i++)
bucket->h.items[i] = items[i];

return bucket;
err:
free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
}



struct crush_bucket*
Expand All @@ -542,6 +589,13 @@ crush_make_bucket(int alg, int hash, int type, int size,

case CRUSH_BUCKET_STRAW:
return (struct crush_bucket *)crush_make_straw_bucket(hash, type, size, items, weights);

case CRUSH_BUCKET_LINEAR:
if (size && weights)
item_weight = weights[0];
else
item_weight = 0;
return (struct crush_bucket *)crush_make_linear_bucket(hash, type, size, items, item_weight);
}
return 0;
}
Expand Down Expand Up @@ -709,6 +763,33 @@ int crush_add_straw_bucket_item(struct crush_bucket_straw *bucket, int item, int
return crush_calc_straw(bucket);
}

int crush_add_linear_bucket_item(struct crush_bucket_linear *bucket, int item, int weight)
{
int newsize = bucket->h.size + 1;
void *_realloc = NULL;

if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}

bucket->h.items[newsize-1] = item;

if (crush_addition_is_unsafe(bucket->h.weight, weight))
return -ERANGE;

bucket->h.weight += weight;
bucket->h.size++;

return 0;
}

int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
{
/* invalidate perm cache */
Expand All @@ -723,6 +804,8 @@ int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
case CRUSH_BUCKET_STRAW:
return crush_add_straw_bucket_item((struct crush_bucket_straw *)b, item, weight);
case CRUSH_BUCKET_LINEAR:
return crush_add_linear_bucket_item((struct crush_bucket_linear *)b, item, weight);
default:
return -1;
}
Expand Down Expand Up @@ -921,6 +1004,36 @@ int crush_remove_straw_bucket_item(struct crush_bucket_straw *bucket, int item)
return crush_calc_straw(bucket);
}

int crush_remove_linear_bucket_item(struct crush_bucket_linear *bucket, int item)
{
unsigned i, j;
int newsize;
void *_realloc = NULL;

for (i = 0; i < bucket->h.size; i++)
if (bucket->h.items[i] == item)
break;
if (i == bucket->h.size)
return -ENOENT;

for (j = i; j < bucket->h.size; j++)
bucket->h.items[j] = bucket->h.items[j+1];
newsize = --bucket->h.size;
bucket->h.weight -= bucket->item_weight;

if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}
return 0;
}

int crush_bucket_remove_item(struct crush_bucket *b, int item)
{
/* invalidate perm cache */
Expand All @@ -935,6 +1048,8 @@ int crush_bucket_remove_item(struct crush_bucket *b, int item)
return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
case CRUSH_BUCKET_STRAW:
return crush_remove_straw_bucket_item((struct crush_bucket_straw *)b, item);
case CRUSH_BUCKET_LINEAR:
return crush_remove_linear_bucket_item((struct crush_bucket_linear *)b, item);
default:
return -1;
}
Expand Down Expand Up @@ -1025,6 +1140,16 @@ int crush_adjust_straw_bucket_item_weight(struct crush_bucket_straw *bucket, int
return diff;
}

int crush_adjust_linear_bucket_item_weight(struct crush_bucket_linear *bucket, int item, int weight)
{
int diff = (weight - bucket->item_weight) * bucket->h.size;

bucket->item_weight = weight;
bucket->h.weight = bucket->item_weight * bucket->h.size;

return diff;
}

int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight)
{
switch (b->alg) {
Expand All @@ -1040,6 +1165,9 @@ int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight
case CRUSH_BUCKET_STRAW:
return crush_adjust_straw_bucket_item_weight((struct crush_bucket_straw *)b,
item, weight);
case CRUSH_BUCKET_LINEAR:
return crush_adjust_linear_bucket_item_weight((struct crush_bucket_linear *)b,
item, weight);
default:
return -1;
}
Expand Down Expand Up @@ -1144,6 +1272,34 @@ static int crush_reweight_straw_bucket(struct crush_map *crush, struct crush_buc
return 0;
}

static int crush_reweight_linear_bucket(struct crush_map *crush, struct crush_bucket_linear *bucket)
{
unsigned i;
unsigned sum = 0, n = 0, leaves = 0;

for (i = 0; i < bucket->h.size; i++) {
int id = bucket->h.items[i];
if (id < 0) {
struct crush_bucket *c = crush->buckets[-1-id];
crush_reweight_bucket(crush, c);

if (crush_addition_is_unsafe(sum, c->weight))
return -ERANGE;

sum += c->weight;
n++;
} else {
leaves++;
}
}

if (n > leaves)
bucket->item_weight = sum / n; // more bucket children than leaves, average!
bucket->h.weight = bucket->item_weight * bucket->h.size;

return 0;
}

int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
{
switch (b->alg) {
Expand All @@ -1155,6 +1311,8 @@ int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
return crush_reweight_tree_bucket(crush, (struct crush_bucket_tree *)b);
case CRUSH_BUCKET_STRAW:
return crush_reweight_straw_bucket(crush, (struct crush_bucket_straw *)b);
case CRUSH_BUCKET_LINEAR:
return crush_reweight_linear_bucket(crush, (struct crush_bucket_linear *)b);
default:
return -1;
}
Expand Down
4 changes: 4 additions & 0 deletions src/crush/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,9 @@ struct crush_bucket_straw *
crush_make_straw_bucket(int hash, int type, int size,
int *items,
int *weights);
struct crush_bucket_linear *
crush_make_linear_bucket(int hash, int type, int size,
int *items,
int item_weight);

#endif
13 changes: 13 additions & 0 deletions src/crush/crush.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const char *crush_bucket_alg_name(int alg)
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
case CRUSH_BUCKET_LINEAR: return "linear";
default: return "unknown";
}
}
Expand All @@ -41,6 +42,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
case CRUSH_BUCKET_LINEAR:
return ((struct crush_bucket_linear *)b)->item_weight;
}
return 0;
}
Expand Down Expand Up @@ -78,6 +81,13 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b);
}

void crush_destroy_bucket_linear(struct crush_bucket_linear *b)
{
kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}

void crush_destroy_bucket(struct crush_bucket *b)
{
switch (b->alg) {
Expand All @@ -93,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
case CRUSH_BUCKET_STRAW:
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
break;
case CRUSH_BUCKET_LINEAR:
crush_destroy_bucket_linear((struct crush_bucket_linear *)b);
break;
}
}

Expand Down
9 changes: 7 additions & 2 deletions src/crush/crush.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ enum {
CRUSH_BUCKET_UNIFORM = 1,
CRUSH_BUCKET_LIST = 2,
CRUSH_BUCKET_TREE = 3,
CRUSH_BUCKET_STRAW = 4
CRUSH_BUCKET_STRAW = 4,
CRUSH_BUCKET_LINEAR = 5
};
extern const char *crush_bucket_alg_name(int alg);

Expand Down Expand Up @@ -159,7 +160,10 @@ struct crush_bucket_straw {
__u32 *straws; /* 16-bit fixed point */
};


struct crush_bucket_linear {
struct crush_bucket h;
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
};

/*
* CRUSH map includes all buckets, rules, etc.
Expand Down Expand Up @@ -203,6 +207,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket_linear(struct crush_bucket_linear *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
Expand Down
3 changes: 2 additions & 1 deletion src/crush/grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ struct crush_grammar : public grammar<crush_grammar>
bucket_alg = str_p("alg") >> ( str_p("uniform") |
str_p("list") |
str_p("tree") |
str_p("straw") );
str_p("straw")|
str_p("linear"));
bucket_hash = str_p("hash") >> ( integer |
str_p("rjenkins1") );
bucket_item = str_p("item") >> name
Expand Down