@@ -9344,64 +9344,79 @@ static regex_t *
93449344get_reg_grapheme_cluster (rb_encoding * enc )
93459345{
93469346 int encidx = rb_enc_to_index (enc );
9347- regex_t * reg_grapheme_cluster = NULL ;
9348- static regex_t * reg_grapheme_cluster_utf8 = NULL ;
93499347
9350- /* synchronize */
9351- if (encidx == rb_utf8_encindex () && reg_grapheme_cluster_utf8 ) {
9352- reg_grapheme_cluster = reg_grapheme_cluster_utf8 ;
9353- }
9354- if (!reg_grapheme_cluster ) {
9355- const OnigUChar source_ascii [] = "\\X" ;
9356- OnigErrorInfo einfo ;
9357- const OnigUChar * source = source_ascii ;
9358- size_t source_len = sizeof (source_ascii ) - 1 ;
9359- switch (encidx ) {
9348+ const OnigUChar source_ascii [] = "\\X" ;
9349+ const OnigUChar * source = source_ascii ;
9350+ size_t source_len = sizeof (source_ascii ) - 1 ;
9351+
9352+ switch (encidx ) {
93609353#define CHARS_16BE (x ) (OnigUChar)((x)>>8), (OnigUChar)(x)
93619354#define CHARS_16LE (x ) (OnigUChar)(x), (OnigUChar)((x)>>8)
93629355#define CHARS_32BE (x ) CHARS_16BE((x)>>16), CHARS_16BE(x)
93639356#define CHARS_32LE (x ) CHARS_16LE(x), CHARS_16LE((x)>>16)
93649357#define CASE_UTF (e ) \
9365- case ENCINDEX_UTF_##e: { \
9366- static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9367- source = source_UTF_##e; \
9368- source_len = sizeof(source_UTF_##e); \
9369- break; \
9370- }
9371- CASE_UTF (16B E ); CASE_UTF (16L E ); CASE_UTF (32B E ); CASE_UTF (32L E );
9358+ case ENCINDEX_UTF_##e: { \
9359+ static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9360+ source = source_UTF_##e; \
9361+ source_len = sizeof(source_UTF_##e); \
9362+ break; \
9363+ }
9364+ CASE_UTF (16B E ); CASE_UTF (16L E ); CASE_UTF (32B E ); CASE_UTF (32L E );
93729365#undef CASE_UTF
93739366#undef CHARS_16BE
93749367#undef CHARS_16LE
93759368#undef CHARS_32BE
93769369#undef CHARS_32LE
9377- }
9378- int r = onig_new (& reg_grapheme_cluster , source , source + source_len ,
9379- ONIG_OPTION_DEFAULT , enc , OnigDefaultSyntax , & einfo );
9380- if (r ) {
9381- UChar message [ONIG_MAX_ERROR_MESSAGE_LEN ];
9382- onig_error_code_to_str (message , r , & einfo );
9383- rb_fatal ("cannot compile grapheme cluster regexp: %s" , (char * )message );
9384- }
9385- if (encidx == rb_utf8_encindex ()) {
9386- reg_grapheme_cluster_utf8 = reg_grapheme_cluster ;
9387- }
93889370 }
9371+
9372+ regex_t * reg_grapheme_cluster ;
9373+ OnigErrorInfo einfo ;
9374+ int r = onig_new (& reg_grapheme_cluster , source , source + source_len ,
9375+ ONIG_OPTION_DEFAULT , enc , OnigDefaultSyntax , & einfo );
9376+ if (r ) {
9377+ UChar message [ONIG_MAX_ERROR_MESSAGE_LEN ];
9378+ onig_error_code_to_str (message , r , & einfo );
9379+ rb_fatal ("cannot compile grapheme cluster regexp: %s" , (char * )message );
9380+ }
9381+
93899382 return reg_grapheme_cluster ;
93909383}
93919384
9385+ static regex_t *
9386+ get_cached_reg_grapheme_cluster (rb_encoding * enc )
9387+ {
9388+ int encidx = rb_enc_to_index (enc );
9389+ static regex_t * reg_grapheme_cluster_utf8 = NULL ;
9390+
9391+ if (encidx == rb_utf8_encindex ()) {
9392+ if (!reg_grapheme_cluster_utf8 ) {
9393+ reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster (enc );
9394+ }
9395+
9396+ return reg_grapheme_cluster_utf8 ;
9397+ }
9398+
9399+ return NULL ;
9400+ }
9401+
93929402static VALUE
93939403rb_str_each_grapheme_cluster_size (VALUE str , VALUE args , VALUE eobj )
93949404{
93959405 size_t grapheme_cluster_count = 0 ;
9396- regex_t * reg_grapheme_cluster = NULL ;
93979406 rb_encoding * enc = get_encoding (str );
93989407 const char * ptr , * end ;
93999408
94009409 if (!rb_enc_unicode_p (enc )) {
94019410 return rb_str_length (str );
94029411 }
94039412
9404- reg_grapheme_cluster = get_reg_grapheme_cluster (enc );
9413+ bool cached_reg_grapheme_cluster = true;
9414+ regex_t * reg_grapheme_cluster = get_cached_reg_grapheme_cluster (enc );
9415+ if (!reg_grapheme_cluster ) {
9416+ reg_grapheme_cluster = get_reg_grapheme_cluster (enc );
9417+ cached_reg_grapheme_cluster = false;
9418+ }
9419+
94059420 ptr = RSTRING_PTR (str );
94069421 end = RSTRING_END (str );
94079422
@@ -9414,14 +9429,17 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
94149429 ptr += len ;
94159430 }
94169431
9432+ if (!cached_reg_grapheme_cluster ) {
9433+ onig_free (reg_grapheme_cluster );
9434+ }
9435+
94179436 return SIZET2NUM (grapheme_cluster_count );
94189437}
94199438
94209439static VALUE
94219440rb_str_enumerate_grapheme_clusters (VALUE str , VALUE ary )
94229441{
94239442 VALUE orig = str ;
9424- regex_t * reg_grapheme_cluster = NULL ;
94259443 rb_encoding * enc = get_encoding (str );
94269444 const char * ptr0 , * ptr , * end ;
94279445
@@ -9430,7 +9448,14 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
94309448 }
94319449
94329450 if (!ary ) str = rb_str_new_frozen (str );
9433- reg_grapheme_cluster = get_reg_grapheme_cluster (enc );
9451+
9452+ bool cached_reg_grapheme_cluster = true;
9453+ regex_t * reg_grapheme_cluster = get_cached_reg_grapheme_cluster (enc );
9454+ if (!reg_grapheme_cluster ) {
9455+ reg_grapheme_cluster = get_reg_grapheme_cluster (enc );
9456+ cached_reg_grapheme_cluster = false;
9457+ }
9458+
94349459 ptr0 = ptr = RSTRING_PTR (str );
94359460 end = RSTRING_END (str );
94369461
@@ -9442,6 +9467,11 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
94429467 ENUM_ELEM (ary , rb_str_subseq (str , ptr - ptr0 , len ));
94439468 ptr += len ;
94449469 }
9470+
9471+ if (!cached_reg_grapheme_cluster ) {
9472+ onig_free (reg_grapheme_cluster );
9473+ }
9474+
94459475 RB_GC_GUARD (str );
94469476 if (ary )
94479477 return ary ;
0 commit comments