diff --git a/array.c b/array.c index e427cb3320e141..7e76227a1664d7 100644 --- a/array.c +++ b/array.c @@ -255,15 +255,24 @@ rb_ary_modify(VALUE ary) rb_ary_modify_check(ary); if (ARY_SHARED_P(ary)) { long len = RARRAY_LEN(ary); + VALUE shared = ARY_SHARED(ary); if (len <= RARRAY_EMBED_LEN_MAX) { VALUE *ptr = ARY_HEAP_PTR(ary); - VALUE shared = ARY_SHARED(ary); FL_UNSET_SHARED(ary); FL_SET_EMBED(ary); MEMCPY(ARY_EMBED_PTR(ary), ptr, VALUE, len); rb_ary_decrement_share(shared); ARY_SET_EMBED_LEN(ary, len); } + else if (ARY_SHARED_NUM(shared) == 1 && len > RARRAY_LEN(shared)>>1) { + long shift = RARRAY_PTR(ary) - RARRAY_PTR(shared); + ARY_SET_PTR(ary, RARRAY_PTR(shared)); + ARY_SET_CAPA(ary, RARRAY_LEN(shared)); + MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+shift, VALUE, len); + FL_UNSET_SHARED(ary); + FL_SET_EMBED(shared); + rb_ary_decrement_share(shared); + } else { VALUE *ptr = ALLOC_N(VALUE, len); MEMCPY(ptr, RARRAY_PTR(ary), VALUE, len); @@ -274,6 +283,38 @@ rb_ary_modify(VALUE ary) } } +static void +ary_ensure_room_for_push(VALUE ary, long add_len) +{ + long new_len = RARRAY_LEN(ary) + add_len; + long capa; + + if (ARY_SHARED_P(ary)) { + if (new_len > RARRAY_EMBED_LEN_MAX) { + VALUE shared = ARY_SHARED(ary); + if (ARY_SHARED_NUM(shared) == 1) { + if (RARRAY_PTR(ary) - RARRAY_PTR(shared) + new_len <= RARRAY_LEN(shared)) { + rb_ary_modify_check(ary); + } + else { + /* if array is shared, than it is likely it participate in push/shift pattern */ + rb_ary_modify(ary); + capa = ARY_CAPA(ary); + if (new_len > capa - (capa >> 6)) { + ary_double_capa(ary, new_len); + } + } + return; + } + } + } + rb_ary_modify(ary); + capa = ARY_CAPA(ary); + if (new_len > capa) { + ary_double_capa(ary, new_len); + } +} + VALUE rb_ary_freeze(VALUE ary) { @@ -295,6 +336,33 @@ rb_ary_frozen_p(VALUE ary) return Qfalse; } +/* This can be used to take a snapshot of an array (with + e.g. rb_ary_replace) and check later whether the array has been + modified from the snapshot. The snapshot is cheap, though if + something does modify the array it will pay the cost of copying + it. */ +VALUE +rb_ary_dup_of_p(VALUE ary1, VALUE ary2) +{ + VALUE *p1, *p2; + long len = RARRAY_LEN(ary1); + + if (len != RARRAY_LEN(ary2)) return Qfalse; + + p1 = RARRAY_PTR(ary1); + p2 = RARRAY_PTR(ary2); + + if (ARY_EMBED_P(ary1) && ARY_EMBED_P(ary2)) { + for (; len; len--, p1++, p2++) { + if (*p1 != *p2) return Qfalse; + } + return Qtrue; + } + + if (p1 == p2) return Qtrue; + return Qfalse; +} + static VALUE ary_alloc(VALUE klass) { @@ -430,8 +498,9 @@ ary_make_shared(VALUE ary) OBJSETUP(shared, 0, T_ARRAY); FL_UNSET_EMBED(shared); - ARY_SET_LEN((VALUE)shared, RARRAY_LEN(ary)); + ARY_SET_LEN((VALUE)shared, ARY_CAPA(ary)); ARY_SET_PTR((VALUE)shared, RARRAY_PTR(ary)); + rb_mem_clear(RARRAY_PTR(shared) + RARRAY_LEN(ary), ARY_CAPA(ary) - RARRAY_LEN(ary)); FL_SET_SHARED_ROOT(shared); ARY_SET_SHARED_NUM((VALUE)shared, 1); FL_SET_SHARED(ary); @@ -721,8 +790,6 @@ ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags return ary_make_partial(ary, rb_cArray, offset, n); } -static VALUE rb_ary_push_1(VALUE ary, VALUE item); - /* * call-seq: * ary << obj -> ary @@ -739,8 +806,12 @@ static VALUE rb_ary_push_1(VALUE ary, VALUE item); VALUE rb_ary_push(VALUE ary, VALUE item) { - rb_ary_modify(ary); - return rb_ary_push_1(ary, item); + long idx = RARRAY_LEN(ary); + + ary_ensure_room_for_push(ary, 1); + RARRAY_PTR(ary)[idx] = item; + ARY_SET_LEN(ary, idx + 1); + return ary; } static VALUE @@ -756,6 +827,18 @@ rb_ary_push_1(VALUE ary, VALUE item) return ary; } +static VALUE +rb_ary_cat(VALUE ary, const VALUE *ptr, long len) +{ + long oldlen = RARRAY_LEN(ary); + + ary_ensure_room_for_push(ary, len); +copy: + MEMCPY(RARRAY_PTR(ary) + oldlen, ptr, VALUE, len); + ARY_SET_LEN(ary, oldlen + len); + return ary; +} + /* * call-seq: * ary.push(obj, ... ) -> ary @@ -772,11 +855,7 @@ rb_ary_push_1(VALUE ary, VALUE item) static VALUE rb_ary_push_m(int argc, VALUE *argv, VALUE ary) { - rb_ary_modify(ary); - while (argc--) { - rb_ary_push_1(ary, *argv++); - } - return ary; + return rb_ary_cat(ary, argv, argc); } VALUE @@ -904,6 +983,55 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) return result; } +static void +ary_ensure_room_for_unshift(VALUE ary, int argc) +{ + long len = RARRAY_LEN(ary); + long new_len = len + argc; + long capa; + VALUE *head, *sharedp; + + if (ARY_SHARED_P(ary)) { + VALUE shared = ARY_SHARED(ary); + capa = RARRAY_LEN(shared); + if (ARY_SHARED_NUM(shared) == 1 && capa > new_len) { + head = RARRAY_PTR(ary); + sharedp = RARRAY_PTR(shared); + goto makeroom_if_need; + } + } + + rb_ary_modify(ary); + capa = ARY_CAPA(ary); + if (capa - (capa >> 6) <= new_len) { + ary_double_capa(ary, new_len); + } + + /* use shared array for big "queues" */ + if (new_len > ARY_DEFAULT_SIZE * 4) { + /* make a room for unshifted items */ + capa = ARY_CAPA(ary); + ary_make_shared(ary); + + head = sharedp = RARRAY_PTR(ary); + goto makeroom; +makeroom_if_need: + if (head - sharedp < argc) { + long room; +makeroom: + room = capa - new_len; + room -= room >> 4; + MEMMOVE(sharedp + argc + room, head, VALUE, len); + head = sharedp + argc + room; + } + ARY_SET_PTR(ary, head - argc); + } + else { + /* sliding items */ + MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len); + } +} + /* * call-seq: * ary.unshift(obj, ...) -> ary @@ -919,19 +1047,16 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) static VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary) { - long len; + long len = RARRAY_LEN(ary); - rb_ary_modify(ary); - if (argc == 0) return ary; - if (ARY_CAPA(ary) <= (len = RARRAY_LEN(ary)) + argc) { - ary_double_capa(ary, len + argc); + if (argc == 0) { + rb_ary_modify_check(ary); + return ary; } - /* sliding items */ - MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len); + ary_ensure_room_for_unshift(ary, argc); MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc); - ARY_INCREASE_LEN(ary, argc); - + ARY_SET_LEN(ary, len + argc); return ary; } @@ -1293,15 +1418,12 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) rpl = rb_ary_to_ary(rpl); rlen = RARRAY_LEN(rpl); } - rb_ary_modify(ary); if (beg >= RARRAY_LEN(ary)) { if (beg > ARY_MAX_SIZE - rlen) { rb_raise(rb_eIndexError, "index %ld too big", beg); } + ary_ensure_room_for_push(ary, rlen-len); /* len is 0 or negative */ len = beg + rlen; - if (len >= ARY_CAPA(ary)) { - ary_double_capa(ary, len); - } rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), beg - RARRAY_LEN(ary)); if (rlen > 0) { MEMCPY(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen); @@ -1311,6 +1433,7 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) else { long alen; + rb_ary_modify(ary); alen = RARRAY_LEN(ary) + rlen - len; if (alen >= ARY_CAPA(ary)) { ary_double_capa(ary, alen); @@ -2100,12 +2223,13 @@ rb_ary_sort_bang(VALUE ary) if (RARRAY_LEN(ary) > 1) { VALUE tmp = ary_make_substitution(ary); /* only ary refers tmp */ struct ary_sort_data data; + long len = RARRAY_LEN(ary); RBASIC(tmp)->klass = 0; data.ary = tmp; data.opt_methods = 0; data.opt_inited = 0; - ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE), + ruby_qsort(RARRAY_PTR(tmp), len, sizeof(VALUE), rb_block_given_p()?sort_1:sort_2, &data); if (ARY_EMBED_P(tmp)) { @@ -2122,7 +2246,7 @@ rb_ary_sort_bang(VALUE ary) if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) { assert(!ARY_EMBED_P(ary)); FL_UNSET_SHARED(ary); - ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + ARY_SET_CAPA(ary, RARRAY_LEN(tmp)); } else { assert(!ARY_SHARED_P(tmp)); @@ -2137,8 +2261,8 @@ rb_ary_sort_bang(VALUE ary) xfree(ARY_HEAP_PTR(ary)); } ARY_SET_PTR(ary, RARRAY_PTR(tmp)); - ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp)); - ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + ARY_SET_HEAP_LEN(ary, len); + ARY_SET_CAPA(ary, RARRAY_LEN(tmp)); } /* tmp was lost ownership for the ptr */ FL_UNSET(tmp, FL_FREEZE); diff --git a/file.c b/file.c index fd64116d2432f5..cd071e10be3f4f 100644 --- a/file.c +++ b/file.c @@ -149,40 +149,60 @@ file_path_convert(VALUE name) return name; } -static VALUE -rb_get_path_check(VALUE obj, int level) +static rb_encoding * +check_path_encoding(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %s", + rb_enc_name(enc), RSTRING_PTR(rb_str_inspect(str))); + } + return enc; +} + +VALUE +rb_get_path_check_to_string(VALUE obj, int level) { VALUE tmp; ID to_path; - rb_encoding *enc; if (insecure_obj_p(obj, level)) { rb_insecure_operation(); } + if (RB_TYPE_P(obj, T_STRING)) { + return obj; + } CONST_ID(to_path, "to_path"); tmp = rb_check_funcall(obj, to_path, 0, 0); if (tmp == Qundef) { tmp = obj; } StringValue(tmp); + return tmp; +} +VALUE +rb_get_path_check_convert(VALUE obj, VALUE tmp, int level) +{ tmp = file_path_convert(tmp); if (obj != tmp && insecure_obj_p(tmp, level)) { rb_insecure_operation(); } - enc = rb_enc_get(tmp); - if (!rb_enc_asciicompat(enc)) { - tmp = rb_str_inspect(tmp); - rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %s", - rb_enc_name(enc), RSTRING_PTR(tmp)); - } + check_path_encoding(tmp); StringValueCStr(tmp); return rb_str_new4(tmp); } +static VALUE +rb_get_path_check(VALUE obj, int level) +{ + VALUE tmp = rb_get_path_check_to_string(obj, level); + return rb_get_path_check_convert(obj, tmp, level); +} + VALUE rb_get_path_no_checksafe(VALUE obj) { @@ -3250,7 +3270,6 @@ rb_file_expand_path(VALUE fname, VALUE dname) VALUE rb_file_expand_path_fast(VALUE fname, VALUE dname) { - check_expand_path_args(fname, dname); return rb_file_expand_path_internal(fname, dname, 0, 0, EXPAND_PATH_BUFFER()); } @@ -5241,7 +5260,7 @@ rb_find_file_ext_safe(VALUE *filep, const char *const *ext, int safe_level) rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f); } - RB_GC_GUARD(load_path) = rb_get_load_path(); + RB_GC_GUARD(load_path) = rb_get_expanded_load_path(); if (!load_path) return 0; fname = rb_str_dup(*filep); @@ -5306,7 +5325,7 @@ rb_find_file_safe(VALUE path, int safe_level) rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f); } - RB_GC_GUARD(load_path) = rb_get_load_path(); + RB_GC_GUARD(load_path) = rb_get_expanded_load_path(); if (load_path) { long i; diff --git a/gc.c b/gc.c index d8cc21bfb2563a..097f895c29c3eb 100644 --- a/gc.c +++ b/gc.c @@ -2304,8 +2304,10 @@ before_gc_sweep(rb_objspace_t *objspace) objspace->heap.do_heap_free = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.65); objspace->heap.free_min = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.2); if (objspace->heap.free_min < initial_free_min) { - objspace->heap.do_heap_free = heaps_used * HEAP_OBJ_LIMIT; objspace->heap.free_min = initial_free_min; + if (objspace->heap.do_heap_free < initial_free_min) { + objspace->heap.do_heap_free = initial_free_min; + } } objspace->heap.sweep_slots = heaps; objspace->heap.free_num = 0; diff --git a/hash.c b/hash.c index b4a9ceea3ad0be..fadad1039b40c6 100644 --- a/hash.c +++ b/hash.c @@ -1075,7 +1075,7 @@ clear_i(VALUE key, VALUE value, VALUE dummy) * */ -static VALUE +VALUE rb_hash_clear(VALUE hash) { rb_hash_modify_check(hash); diff --git a/include/ruby/intern.h b/include/ruby/intern.h index b79f0166f96cc0..73f079142b5632 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -56,6 +56,7 @@ VALUE rb_ary_tmp_new(long); void rb_ary_free(VALUE); void rb_ary_modify(VALUE); VALUE rb_ary_freeze(VALUE); +VALUE rb_ary_dup_of_p(VALUE, VALUE); VALUE rb_ary_aref(int, VALUE*, VALUE); VALUE rb_ary_subseq(VALUE, long, long); void rb_ary_store(VALUE, long, VALUE); @@ -442,6 +443,7 @@ VALUE rb_hash_lookup(VALUE, VALUE); VALUE rb_hash_lookup2(VALUE, VALUE, VALUE); VALUE rb_hash_fetch(VALUE, VALUE); VALUE rb_hash_aset(VALUE, VALUE, VALUE); +VALUE rb_hash_clear(VALUE); VALUE rb_hash_delete_if(VALUE); VALUE rb_hash_delete(VALUE,VALUE); typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value); diff --git a/internal.h b/internal.h index e7c4adc3d8bae8..5927d93c70a8ff 100644 --- a/internal.h +++ b/internal.h @@ -95,6 +95,8 @@ VALUE rb_home_dir(const char *user, VALUE result); VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict); VALUE rb_file_expand_path_fast(VALUE, VALUE); VALUE rb_file_expand_path_internal(VALUE, VALUE, int, int, VALUE); +VALUE rb_get_path_check_to_string(VALUE, int); +VALUE rb_get_path_check_convert(VALUE, VALUE, int); void Init_File(void); #ifdef _WIN32 @@ -120,6 +122,7 @@ VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase); /* load.c */ VALUE rb_get_load_path(void); +VALUE rb_get_expanded_load_path(void); /* math.c */ VALUE rb_math_atan2(VALUE, VALUE); diff --git a/load.c b/load.c index 163ec4c44228db..a83b434fa2129a 100644 --- a/load.c +++ b/load.c @@ -34,21 +34,120 @@ rb_get_load_path(void) return load_path; } -VALUE -rb_get_expanded_load_path(void) +enum expand_type { + EXPAND_ALL, + EXPAND_RELATIVE, + EXPAND_HOME, + EXPAND_NON_CACHE +}; + +/* Construct expanded load path and store it to cache. + We rebuild load path partially if the cache is invalid. + We don't cache non string object and expand it every time. We ensure that + string objects in $LOAD_PATH are frozen. + */ +static void +rb_construct_expanded_load_path(int type, int *has_relative, int *has_non_cache) { - VALUE load_path = rb_get_load_path(); + rb_vm_t *vm = GET_VM(); + VALUE load_path = vm->load_path; + VALUE expanded_load_path = vm->expanded_load_path; VALUE ary; long i; + int level = rb_safe_level(); ary = rb_ary_new2(RARRAY_LEN(load_path)); for (i = 0; i < RARRAY_LEN(load_path); ++i) { - VALUE path = rb_file_expand_path_fast(RARRAY_PTR(load_path)[i], Qnil); - rb_str_freeze(path); - rb_ary_push(ary, path); + VALUE path, as_str, expanded_path; + int is_string, non_cache; + char *as_cstr; + as_str = path = RARRAY_PTR(load_path)[i]; + is_string = RB_TYPE_P(path, T_STRING) ? 1 : 0; + non_cache = !is_string ? 1 : 0; + as_str = rb_get_path_check_to_string(path, level); + as_cstr = RSTRING_PTR(as_str); + + if (!non_cache) { + if ((type == EXPAND_RELATIVE && + rb_is_absolute_path(as_cstr)) || + (type == EXPAND_HOME && + (!as_cstr[0] || as_cstr[0] != '~')) || + (type == EXPAND_NON_CACHE)) { + /* Use cached expanded path. */ + rb_ary_push(ary, RARRAY_PTR(expanded_load_path)[i]); + continue; + } + } + if (!*has_relative && !rb_is_absolute_path(as_cstr)) + *has_relative = 1; + if (!*has_non_cache && non_cache) + *has_non_cache = 1; + /* Freeze only string object. We expand other objects every time. */ + if (is_string) + rb_str_freeze(path); + as_str = rb_get_path_check_convert(path, as_str, level); + expanded_path = rb_file_expand_path_fast(as_str, Qnil); + rb_str_freeze(expanded_path); + rb_ary_push(ary, expanded_path); } rb_obj_freeze(ary); - return ary; + vm->expanded_load_path = ary; + rb_ary_replace(vm->load_path_snapshot, vm->load_path); +} + +static VALUE +load_path_getcwd(void) +{ + char *cwd = my_getcwd(); + VALUE cwd_str = rb_filesystem_str_new_cstr(cwd); + xfree(cwd); + return cwd_str; +} + +VALUE +rb_get_expanded_load_path(void) +{ + rb_vm_t *vm = GET_VM(); + const VALUE non_cache = Qtrue; + + if (!rb_ary_dup_of_p(vm->load_path_snapshot, vm->load_path)) { + /* The load path was modified. Rebuild the expanded load path. */ + int has_relative = 0, has_non_cache = 0; + rb_construct_expanded_load_path(EXPAND_ALL, &has_relative, &has_non_cache); + if (has_relative) { + vm->load_path_check_cache = load_path_getcwd(); + } + else if (has_non_cache) { + /* Non string object. */ + vm->load_path_check_cache = non_cache; + } + else { + vm->load_path_check_cache = 0; + } + } + else if (vm->load_path_check_cache == non_cache) { + int has_relative = 1, has_non_cache = 1; + /* Expand only non-cacheable objects. */ + rb_construct_expanded_load_path(EXPAND_NON_CACHE, + &has_relative, &has_non_cache); + } + else if (vm->load_path_check_cache) { + int has_relative = 1, has_non_cache = 1; + VALUE cwd = load_path_getcwd(); + if (!rb_str_equal(vm->load_path_check_cache, cwd)) { + /* Current working directory or filesystem encoding was changed. + Expand relative load path and non-cacheable objects again. */ + vm->load_path_check_cache = cwd; + rb_construct_expanded_load_path(EXPAND_RELATIVE, + &has_relative, &has_non_cache); + } + else { + /* Expand only tilde (User HOME) and non-cacheable objects. */ + rb_construct_expanded_load_path(EXPAND_HOME, + &has_relative, &has_non_cache); + } + } + return vm->expanded_load_path; } static VALUE @@ -63,12 +162,320 @@ get_loaded_features(void) return GET_VM()->loaded_features; } +static void +reset_loaded_features_snapshot(void) +{ + rb_vm_t *vm = GET_VM(); + rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features); +} + static st_table * get_loading_table(void) { return GET_VM()->loading_table; } +static inline uint32_t +fmix_uint(uint32_t val) +{ + /* with -O2 even on i386 gcc and clang transform this to + * single multiplication 32bit*32bit=64bit */ + uint64_t res = ((uint64_t)val) * (uint64_t)0x85ebca6b; + return (uint32_t)res ^ (uint32_t)(res>>32); +} + +static uint32_t +fast_string_hash(const char *str, long len) +{ + uint32_t res = 0; + for(;len > 0; str+=16, len-=16) { + uint32_t buf[4] = {0, 0, 0, 0}; + memcpy(buf, str, len < 16 ? len : 16); + res = fmix_uint(res ^ buf[0]); + res = fmix_uint(res ^ buf[1]); + res = fmix_uint(res ^ buf[2]); + res = fmix_uint(res ^ buf[3]); + } + return res; +} + +/* + * We build index for loaded features relying on fact that loaded_feature_path + * rechecks feature name. So that, we could store only hash of string instead + * of whole string in a hash. + * Instead of allocation of array of offsets by each feature, we organize + * offsets in a single linked lists - one list per feature - which are stored + * in a single array. And we store in a hash positions of head and tail of + * this list + */ +#define FI_LAST (-1) +#define FI_DEFAULT_HASH_SIZE (64) +#define FI_DEFAULT_LIST_SIZE (64) + +typedef struct features_index_hash_item { + uint32_t hash; + /* we will store position + 1 in head and tail, + * so that if head == 0 then item is free */ + int head; + int tail; +} fi_hash_item; + +typedef struct features_index_hash { + int capa; + int size; + fi_hash_item *items; +} fi_hash; + +static fi_hash_item * +fi_hash_candidate(fi_hash *index, uint32_t hash) +{ + fi_hash_item *items = index->items; + int capa_1 = index->capa - 1; + int pos = hash & capa_1; + if (items[pos].hash != hash && items[pos].head != 0) { + /* always odd, so that it has no common diviser with capa*/ + int step = (hash % capa_1) | 1; + do { + pos = (pos + step) & capa_1; + } while (items[pos].hash != hash && items[pos].head != 0); + } + return items + pos; +} + +static void +fi_hash_rehash(fi_hash *index) +{ + fi_hash temp; + int i; + fi_hash_item *items = index->items; + temp.capa = index->capa * 2; + temp.size = index->size; + temp.items = xcalloc(temp.capa, sizeof(fi_hash_item)); + for(i=0; i < index->capa; i++) { + if (items[i].head) { + fi_hash_item *item = fi_hash_candidate(&temp, items[i].hash); + *item = items[i]; + } + } + *index = temp; + xfree(items); +} + +static int +fi_hash_find_head(fi_hash *index, const char *str, long len) +{ + uint32_t hash = fast_string_hash(str, len); + fi_hash_item *item = fi_hash_candidate(index, hash); + return item->head - 1; /* if head==0 then result is FI_LAST */ +} + +/* inserts position of tail into hash, + * returns previous tail position or FI_LAST */ +static int +fi_hash_insert_pos(fi_hash *index, const char *str, long len, int pos) +{ + fi_hash_item *item; + int hash = fast_string_hash(str, len); + if (index->size > index->capa / 4 * 3) { + fi_hash_rehash(index); + } + item = fi_hash_candidate(index, hash); + if (item->head) { + int res = item->tail - 1; + item->tail = pos + 1; + return res; + } + else { + item->hash = hash; + item->head = pos + 1; + item->tail = pos + 1; + index->size++; + return FI_LAST; + } +} + +typedef struct features_index_multilist_item { + int offset; + int next; +} fi_list_item; + +typedef struct features_index_multilist { + fi_list_item *items; + int capa; + int size; +} fi_list; + +static void +fi_list_insert_offset(fi_list *list, int offset, int prev_pos) +{ + if (list->size == list->capa) { + REALLOC_N(list->items, fi_list_item, list->capa*2); + MEMZERO(list->items + list->capa, fi_list_item, list->capa); + list->capa*=2; + } + list->items[list->size].offset = offset; + list->items[list->size].next = FI_LAST; + if (prev_pos != FI_LAST) { + list->items[prev_pos].next = list->size; + } + list->size++; +} + +typedef struct features_index { + fi_hash hash; + fi_list list; +} st_features_index; + +static void +features_index_free(void *p) +{ + if (p) { + st_features_index *fi = p; + xfree(fi->hash.items); + xfree(fi->list.items); + xfree(fi); + } +} + +static VALUE +features_index_allocate() +{ + st_features_index *index = xcalloc(1, sizeof(st_features_index)); + index->hash.capa = FI_DEFAULT_HASH_SIZE; + index->hash.items = xcalloc(index->hash.capa, sizeof(fi_hash_item)); + index->list.capa = FI_DEFAULT_LIST_SIZE; + index->list.items = xcalloc(index->list.capa, sizeof(fi_list_item)); + return Data_Wrap_Struct(rb_cObject, 0, features_index_free, index); +} + +static st_features_index * +get_loaded_features_index_raw(void) +{ + st_features_index *index; + Data_Get_Struct(GET_VM()->loaded_features_index, st_features_index, index); + return index; +} + +static void +features_index_clear() +{ + st_features_index *index = get_loaded_features_index_raw(); + MEMZERO(index->hash.items, fi_hash_item, index->hash.capa); + index->hash.size = 0; + MEMZERO(index->list.items, fi_list_item, index->list.capa); + index->list.size = 0; +} + +static void +features_index_add_single(const char *short_feature, long len, int offset) +{ + st_features_index *index = get_loaded_features_index_raw(); + int prev_pos = fi_hash_insert_pos(&index->hash, short_feature, len, index->list.size); + fi_list_insert_offset(&index->list, offset, prev_pos); +} + +/* Add to the loaded-features index all the required entries for + `feature`, located at `offset` in $LOADED_FEATURES. We add an + index entry at each string `short_feature` for which + feature == "#{prefix}#{short_feature}#{e}" + where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty + or ends in '/'. This maintains the invariant that `rb_feature_p()` + relies on for its fast lookup. +*/ +static void +features_index_add(const char *feature_str, long len, int offset) +{ + const char *feature_end, *ext, *p; + + feature_end = feature_str + len; + + for (ext = feature_end; ext > feature_str; ext--) + if (*ext == '.' || *ext == '/') + break; + if (*ext != '.') + ext = NULL; + /* Now `ext` points to the only string matching %r{^\.[^./]*$} that is + at the end of `feature`, or is NULL if there is no such string. */ + + p = ext ? ext : feature_end; + while (1) { + p--; + while (p >= feature_str && *p != '/') + p--; + if (p < feature_str) + break; + /* Now *p == '/'. We reach this point for every '/' in `feature`. */ + features_index_add_single(p + 1, feature_end - p - 1, offset); + if (ext) { + features_index_add_single(p + 1, ext - p - 1, offset); + } + } + features_index_add_single(feature_str, len, offset); + if (ext) { + features_index_add_single(feature_str, ext - feature_str, offset); + } +} + +static fi_list_item +features_index_find(st_features_index *index, const char *feature, long len) +{ + int pos = fi_hash_find_head(&index->hash, feature, len); + if (pos == FI_LAST) { + fi_list_item res = {FI_LAST, FI_LAST}; + return res; + } + else + return index->list.items[pos]; +} + +static fi_list_item +features_index_next(st_features_index *index, fi_list_item cur) +{ + if (cur.next == FI_LAST) { + fi_list_item res = {FI_LAST, FI_LAST}; + return res; + } + else + return index->list.items[cur.next]; +} + +static st_features_index * +get_loaded_features_index(void) +{ + VALUE features; + int i; + rb_vm_t *vm = GET_VM(); + + if (!rb_ary_dup_of_p(vm->loaded_features_snapshot, vm->loaded_features)) { + /* The sharing was broken; something (other than us in rb_provide_feature()) + modified loaded_features. Rebuild the index. */ + features_index_clear(); + features = vm->loaded_features; + for (i = 0; i < RARRAY_LEN(features); i++) { + VALUE entry, as_str; + as_str = entry = rb_ary_entry(features, i); + StringValue(as_str); + if (as_str != entry) + rb_ary_store(features, i, as_str); + rb_str_freeze(as_str); + features_index_add(RSTRING_PTR(as_str), RSTRING_LEN(as_str), i); + } + reset_loaded_features_snapshot(); + } + return get_loaded_features_index_raw(); +} + +/* This searches `load_path` for a value such that + name == "#{load_path[i]}/#{feature}" + if `feature` is a suffix of `name`, or otherwise + name == "#{load_path[i]}/#{feature}#{ext}" + for an acceptable string `ext`. It returns + `load_path[i].to_str` if found, else 0. + + If type is 's', then `ext` is acceptable only if IS_DLEXT(ext); + if 'r', then only if IS_RBEXT(ext); otherwise `ext` may be absent + or have any value matching `%r{^\.[^./]*$}`. +*/ static VALUE loaded_feature_path(const char *name, long vlen, const char *feature, long len, int type, VALUE load_path) @@ -88,23 +495,22 @@ loaded_feature_path(const char *name, long vlen, const char *feature, long len, return 0; plen = e - name - len - 1; } + if (type == 's' && !IS_DLEXT(&name[plen+len+1]) + || type == 'r' && !IS_RBEXT(&name[plen+len+1]) + || name[plen] != '/') { + return 0; + } + /* Now name == "#{prefix}/#{feature}#{ext}" where ext is acceptable + (possibly empty) and prefix is some string of length plen. */ + for (i = 0; i < RARRAY_LEN(load_path); ++i) { VALUE p = RARRAY_PTR(load_path)[i]; const char *s = StringValuePtr(p); long n = RSTRING_LEN(p); - if (n != plen ) continue; - if (n && (strncmp(name, s, n) || name[n] != '/')) continue; - switch (type) { - case 's': - if (IS_DLEXT(&name[n+len+1])) return p; - break; - case 'r': - if (IS_RBEXT(&name[n+len+1])) return p; - break; - default: - return p; - } + if (n != plen) continue; + if (n && strncmp(name, s, n)) continue; + return p; } return 0; } @@ -132,10 +538,12 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f) static int rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn) { - VALUE v, features, p, load_path = 0; + VALUE features, v, p, load_path = 0; const char *f, *e; long i, len, elen, n; st_table *loading_tbl; + st_features_index *features_index; + fi_list_item index_list; st_data_t data; int type; @@ -151,8 +559,43 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c type = 0; } features = get_loaded_features(); - for (i = 0; i < RARRAY_LEN(features); ++i) { - v = RARRAY_PTR(features)[i]; + features_index = get_loaded_features_index(); + + index_list = features_index_find(features_index, feature, len); + /* We search `features` for an entry such that either + "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}" + for some j, or + "#{features[i]}" == "#{feature}#{e}" + Here `e` is an "allowed" extension -- either empty or one + of the extensions accepted by IS_RBEXT, IS_SOEXT, or + IS_DLEXT. Further, if `ext && rb` then `IS_RBEXT(e)`, + and if `ext && !rb` then `IS_SOEXT(e) || IS_DLEXT(e)`. + + If `expanded`, then only the latter form (without load_path[j]) + is accepted. Otherwise either form is accepted, *unless* `ext` + is false and an otherwise-matching entry of the first form is + preceded by an entry of the form + "#{features[i2]}" == "#{load_path[j2]}/#{feature}#{e2}" + where `e2` matches %r{^\.[^./]*$} but is not an allowed extension. + After a "distractor" entry of this form, only entries of the + form "#{feature}#{e}" are accepted. + + In `rb_provide_feature()` and `get_loaded_features_index()` we + maintain an invariant that the list `index` will point to at least + every entry in `features` which has the form + "#{prefix}#{feature}#{e}" + where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty + or ends in '/'. This includes both match forms above, as well + as any distractors, so we may ignore all other entries in `features`. + (since we store only hash value of feature, there could be also + other features in the list, but probability of it is very small, + and loaded_feature_path will recheck for it) + */ + for (; index_list.offset != FI_LAST ; + index_list = features_index_next(features_index, index_list)) { + long index = index_list.offset; + + v = RARRAY_PTR(features)[index]; f = StringValuePtr(v); if ((n = RSTRING_LEN(v)) < len) continue; if (strncmp(f, feature, len) != 0) { @@ -175,6 +618,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c return 'r'; } } + loading_tbl = get_loading_table(); if (loading_tbl) { f = 0; @@ -183,7 +627,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c fs.name = feature; fs.len = len; fs.type = type; - fs.load_path = load_path ? load_path : rb_get_load_path(); + fs.load_path = load_path ? load_path : rb_get_expanded_load_path(); fs.result = 0; st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs); if ((f = fs.result) != 0) { @@ -233,7 +677,7 @@ rb_feature_provided(const char *feature, const char **loading) if (*feature == '.' && (feature[1] == '/' || strncmp(feature+1, "./", 2) == 0)) { - fullpath = rb_file_expand_path_fast(rb_str_new2(feature), Qnil); + fullpath = rb_file_expand_path_fast(rb_get_path(rb_str_new2(feature)), Qnil); feature = RSTRING_PTR(fullpath); } if (ext && !strchr(ext, '/')) { @@ -254,11 +698,20 @@ rb_feature_provided(const char *feature, const char **loading) static void rb_provide_feature(VALUE feature) { - if (OBJ_FROZEN(get_loaded_features())) { + VALUE features; + + features = get_loaded_features(); + if (OBJ_FROZEN(features)) { rb_raise(rb_eRuntimeError, "$LOADED_FEATURES is frozen; cannot append feature"); } - rb_ary_push(get_loaded_features(), feature); + rb_str_freeze(feature); + + rb_ary_push(features, feature); + StringValue(feature); + features_index_add(RSTRING_PTR(feature), RSTRING_LEN(feature), (int)(RARRAY_LEN(features)-1)); + RB_GC_GUARD(feature); + reset_loaded_features_snapshot(); } void @@ -774,10 +1227,15 @@ Init_load() rb_alias_variable(rb_intern("$-I"), id_load_path); rb_alias_variable(rb_intern("$LOAD_PATH"), id_load_path); vm->load_path = rb_ary_new(); + vm->expanded_load_path = rb_ary_new(); + vm->load_path_snapshot = rb_ary_new(); + vm->load_path_check_cache = 0; rb_define_virtual_variable("$\"", get_loaded_features, 0); rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0); vm->loaded_features = rb_ary_new(); + vm->loaded_features_snapshot = rb_ary_new(); + vm->loaded_features_index = features_index_allocate(); rb_define_global_function("load", rb_f_load, -1); rb_define_global_function("require", rb_f_require, 1); diff --git a/ruby.c b/ruby.c index 3ddd96c7bb6a44..7ffc78ed727da7 100644 --- a/ruby.c +++ b/ruby.c @@ -1366,7 +1366,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) long i; VALUE load_path = GET_VM()->load_path; for (i = 0; i < RARRAY_LEN(load_path); ++i) { - rb_enc_associate(RARRAY_PTR(load_path)[i], lenc); + RARRAY_PTR(load_path)[i] = + rb_enc_associate(rb_str_dup(RARRAY_PTR(load_path)[i]), lenc); } } if (!(opt->disable & DISABLE_BIT(gems))) { diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb index 28d3e4156b55be..bcad2c0955822f 100644 --- a/test/ruby/test_array.rb +++ b/test/ruby/test_array.rb @@ -414,6 +414,18 @@ def test_ASET # '[]=' a = @cls[1, 2, 3] a[-1, 0] = a assert_equal([1, 2, 1, 2, 3, 3], a) + + a = @cls[] + a[5,0] = [5] + assert_equal([nil, nil, nil, nil, nil, 5], a) + + a = @cls[1] + a[1,0] = [2] + assert_equal([1, 2], a) + + a = @cls[1] + a[1,1] = [2] + assert_equal([1, 2], a) end def test_assoc diff --git a/test/ruby/test_require.rb b/test/ruby/test_require.rb index 58a9ee26b61bf5..ec7509644ec6de 100644 --- a/test/ruby/test_require.rb +++ b/test/ruby/test_require.rb @@ -356,4 +356,114 @@ def test_loaded_features_encoding $:.replace(loadpath) $".replace(features) end + + def test_require_changed_current_dir + bug7158 = '[ruby-core:47970]' + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + Dir.mkdir("a") + Dir.mkdir("b") + open(File.join("a", "foo.rb"), "w") {} + open(File.join("b", "bar.rb"), "w") {|f| + f.puts "p :ok" + } + assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158) + $: << "." + Dir.chdir("a") + require "foo" + Dir.chdir("../b") + p :ng unless require "bar" + Dir.chdir("..") + p :ng if require "b/bar" + INPUT + } + } + end + + def test_require_not_modified_load_path + bug7158 = '[ruby-core:47970]' + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + open("foo.rb", "w") {} + assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158) + a = Object.new + def a.to_str + "#{tmp}" + end + $: << a + require "foo" + last_path = $:.pop + p :ok if last_path == a && last_path.class == Object + INPUT + } + } + end + + def test_require_changed_home + bug7158 = '[ruby-core:47970]' + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + open("foo.rb", "w") {} + Dir.mkdir("a") + open(File.join("a", "bar.rb"), "w") {} + assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158) + $: << '~' + ENV['HOME'] = "#{tmp}" + require "foo" + ENV['HOME'] = "#{tmp}/a" + p :ok if require "bar" + INPUT + } + } + end + + def test_require_to_path_redefined_in_load_path + bug7158 = '[ruby-core:47970]' + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + open("foo.rb", "w") {} + assert_in_out_err(["RUBYOPT"=>nil], <<-INPUT, %w(:ok), [], bug7158) + a = Object.new + def a.to_path + "bar" + end + $: << a + begin + require "foo" + p :ng + rescue LoadError + end + def a.to_path + "#{tmp}" + end + p :ok if require "foo" + INPUT + } + } + end + + def test_require_to_str_redefined_in_load_path + bug7158 = '[ruby-core:47970]' + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + open("foo.rb", "w") {} + assert_in_out_err(["RUBYOPT"=>nil], <<-INPUT, %w(:ok), [], bug7158) + a = Object.new + def a.to_str + "foo" + end + $: << a + begin + require "foo" + p :ng + rescue LoadError + end + def a.to_str + "#{tmp}" + end + p :ok if require "foo" + INPUT + } + } + end end diff --git a/vm.c b/vm.c index 34f82bcd9812e1..23caa5cc3f9e42 100644 --- a/vm.c +++ b/vm.c @@ -1592,7 +1592,12 @@ rb_vm_mark(void *ptr) RUBY_MARK_UNLESS_NULL(vm->thgroup_default); RUBY_MARK_UNLESS_NULL(vm->mark_object_ary); RUBY_MARK_UNLESS_NULL(vm->load_path); + RUBY_MARK_UNLESS_NULL(vm->load_path_snapshot); + RUBY_MARK_UNLESS_NULL(vm->load_path_check_cache); + RUBY_MARK_UNLESS_NULL(vm->expanded_load_path); RUBY_MARK_UNLESS_NULL(vm->loaded_features); + RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot); + RUBY_MARK_UNLESS_NULL(vm->loaded_features_index); RUBY_MARK_UNLESS_NULL(vm->top_self); RUBY_MARK_UNLESS_NULL(vm->coverages); rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count); diff --git a/vm_core.h b/vm_core.h index dfc0e3c1ef3219..70c5f5cd007b59 100644 --- a/vm_core.h +++ b/vm_core.h @@ -299,7 +299,12 @@ typedef struct rb_vm_struct { /* load */ VALUE top_self; VALUE load_path; + VALUE load_path_snapshot; + VALUE load_path_check_cache; + VALUE expanded_load_path; VALUE loaded_features; + VALUE loaded_features_snapshot; + VALUE loaded_features_index; struct st_table *loading_table; /* signal */