Skip to content

Commit

Permalink
UTF-8 hash keys, patch from Inaba Hiroto.
Browse files Browse the repository at this point in the history
p4raw-id: //depot/perl@7980
  • Loading branch information
jhi committed Dec 4, 2000
1 parent 38ff9fd commit da58a35
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 28 deletions.
8 changes: 4 additions & 4 deletions embed.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1563,11 +1563,11 @@ END
Apd |HV* |gv_stashsv |SV* sv|I32 create
Apd |void |hv_clear |HV* tb
Ap |void |hv_delayfree_ent|HV* hv|HE* entry
Apd |SV* |hv_delete |HV* tb|const char* key|U32 klen|I32 flags
Apd |SV* |hv_delete |HV* tb|const char* key|I32 klen|I32 flags
Apd |SV* |hv_delete_ent |HV* tb|SV* key|I32 flags|U32 hash
Apd |bool |hv_exists |HV* tb|const char* key|U32 klen
Apd |bool |hv_exists |HV* tb|const char* key|I32 klen
Apd |bool |hv_exists_ent |HV* tb|SV* key|U32 hash
Apd |SV** |hv_fetch |HV* tb|const char* key|U32 klen|I32 lval
Apd |SV** |hv_fetch |HV* tb|const char* key|I32 klen|I32 lval
Apd |HE* |hv_fetch_ent |HV* tb|SV* key|I32 lval|U32 hash
Ap |void |hv_free_ent |HV* hv|HE* entry
Apd |I32 |hv_iterinit |HV* tb
Expand All @@ -1578,7 +1578,7 @@ END
Apd |SV* |hv_iterval |HV* tb|HE* entry
Ap |void |hv_ksplit |HV* hv|IV newmax
Apd |void |hv_magic |HV* hv|GV* gv|int how
Apd |SV** |hv_store |HV* tb|const char* key|U32 klen|SV* val \
Apd |SV** |hv_store |HV* tb|const char* key|I32 klen|SV* val \
|U32 hash
Apd |HE* |hv_store_ent |HV* tb|SV* key|SV* val|U32 hash
Apd |void |hv_undef |HV* tb
Expand Down
92 changes: 76 additions & 16 deletions hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,19 @@ S_save_hek(pTHX_ const char *str, I32 len, U32 hash)
{
char *k;
register HEK *hek;
bool is_utf8 = FALSE;

if (len < 0) {
len = -len;
is_utf8 = TRUE;
}

New(54, k, HEK_BASESIZE + len + 1, char);
hek = (HEK*)k;
Copy(str, HEK_KEY(hek), len, char);
*(HEK_KEY(hek) + len) = '\0';
HEK_LEN(hek) = len;
HEK_HASH(hek) = hash;
HEK_UTF8(hek) = (char)is_utf8;
return hek;
}

Expand Down Expand Up @@ -112,9 +118,9 @@ Perl_he_dup(pTHX_ HE *e, bool shared)
if (HeKLEN(e) == HEf_SVKEY)
HeKEY_sv(ret) = SvREFCNT_inc(sv_dup(HeKEY_sv(e)));
else if (shared)
HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN(e), HeHASH(e));
HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
else
HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN(e), HeHASH(e));
HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
HeVAL(ret) = SvREFCNT_inc(sv_dup(HeVAL(e)));
return ret;
}
Expand All @@ -138,16 +144,22 @@ information on how to use this function on tied hashes.
*/

SV**
Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
{
register XPVHV* xhv;
register U32 hash;
register HE *entry;
SV *sv;
bool is_utf8 = FALSE;

if (!hv)
return 0;

if (klen < 0) {
klen = -klen;
is_utf8 = TRUE;
}

if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv,'P')) {
dTHR;
Expand Down Expand Up @@ -194,6 +206,8 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
return &HeVAL(entry);
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
Expand All @@ -209,7 +223,7 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
#endif
if (lval) { /* gonna assign to this, so it better be there */
sv = NEWSV(61,0);
return hv_store(hv,key,klen,sv,hash);
return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
}
return 0;
}
Expand Down Expand Up @@ -241,6 +255,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
STRLEN klen;
register HE *entry;
SV *sv;
bool is_utf8;

if (!hv)
return 0;
Expand Down Expand Up @@ -291,6 +306,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
}

key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv)!=0);

if (!hash)
PERL_HASH(hash, key, klen);
Expand All @@ -303,6 +319,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
return entry;
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
Expand Down Expand Up @@ -361,16 +379,22 @@ information on how to use this function on tied hashes.
*/

SV**
Perl_hv_store(pTHX_ HV *hv, const char *key, U32 klen, SV *val, register U32 hash)
Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 hash)
{
register XPVHV* xhv;
register I32 i;
register HE *entry;
register HE **oentry;
bool is_utf8 = FALSE;

if (!hv)
return 0;

if (klen < 0) {
klen = -klen;
is_utf8 = TRUE;
}

xhv = (XPVHV*)SvANY(hv);
if (SvMAGICAL(hv)) {
bool needs_copy;
Expand Down Expand Up @@ -406,16 +430,18 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, U32 klen, SV *val, register U32 has
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
return &HeVAL(entry);
}

entry = new_HE();
if (HvSHAREKEYS(hv))
HeKEY_hek(entry) = share_hek(key, klen, hash);
HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek(key, klen, hash);
HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
Expand Down Expand Up @@ -458,6 +484,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
register I32 i;
register HE *entry;
register HE **oentry;
bool is_utf8;

if (!hv)
return 0;
Expand Down Expand Up @@ -489,6 +516,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
}

key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv) != 0);

if (!hash)
PERL_HASH(hash, key, klen);
Expand All @@ -507,16 +535,18 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
return entry;
}

entry = new_HE();
if (HvSHAREKEYS(hv))
HeKEY_hek(entry) = share_hek(key, klen, hash);
HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek(key, klen, hash);
HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
Expand All @@ -543,7 +573,7 @@ will be returned.
*/

SV *
Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
{
register XPVHV* xhv;
register I32 i;
Expand All @@ -552,9 +582,14 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
register HE **oentry;
SV **svp;
SV *sv;
bool is_utf8 = FALSE;

if (!hv)
return Nullsv;
if (klen < 0) {
klen = -klen;
is_utf8 = TRUE;
}
if (SvRMAGICAL(hv)) {
bool needs_copy;
bool needs_store;
Expand Down Expand Up @@ -594,6 +629,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
Expand Down Expand Up @@ -634,6 +671,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
register HE *entry;
register HE **oentry;
SV *sv;
bool is_utf8;

if (!hv)
return Nullsv;
Expand Down Expand Up @@ -667,6 +705,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
return Nullsv;

key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv) != 0);

if (!hash)
PERL_HASH(hash, key, klen);
Expand All @@ -681,6 +720,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
Expand Down Expand Up @@ -710,16 +751,22 @@ C<klen> is the length of the key.
*/

bool
Perl_hv_exists(pTHX_ HV *hv, const char *key, U32 klen)
Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
{
register XPVHV* xhv;
register U32 hash;
register HE *entry;
SV *sv;
bool is_utf8 = FALSE;

if (!hv)
return 0;

if (klen < 0) {
klen = -klen;
is_utf8 = TRUE;
}

if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv,'P')) {
dTHR;
Expand Down Expand Up @@ -756,6 +803,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, U32 klen)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
return TRUE;
}
#ifdef DYNAMIC_ENV_FETCH /* is it out there? */
Expand Down Expand Up @@ -1051,7 +1100,7 @@ Perl_newHVhv(pTHX_ HV *ohv)
/* Slow way */
hv_iterinit(ohv);
while ((entry = hv_iternext(ohv))) {
hv_store(hv, HeKEY(entry), HeKLEN(entry),
hv_store(hv, HeKEY(entry), HeKLEN_UTF8(entry),
SvREFCNT_inc(HeVAL(entry)), HeHASH(entry));
}
HvRITER(ohv) = hv_riter;
Expand Down Expand Up @@ -1343,8 +1392,11 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry)
if (HeKLEN(entry) == HEf_SVKEY)
return sv_mortalcopy(HeKEY_sv(entry));
else {
return sv_2mortal(newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
HeKLEN(entry), HeHASH(entry)));
SV *sv = newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
HeKLEN(entry), HeHASH(entry));
if (HeKUTF8(entry))
SvUTF8_on(sv);
return sv_2mortal(sv);
}
}

Expand Down Expand Up @@ -1471,6 +1523,12 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
register HE **oentry;
register I32 i = 1;
I32 found = 0;
bool is_utf8 = FALSE;

if (len < 0) {
len = -len;
is_utf8 = TRUE;
}

/* what follows is the moral equivalent of:
Expand All @@ -1488,12 +1546,14 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
continue;
if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
found = 1;
break;
}
if (!found) {
entry = new_HE();
HeKEY_hek(entry) = save_hek(str, len, hash);
HeKEY_hek(entry) = save_hek(str, is_utf8?-len:len, hash);
HeVAL(entry) = Nullsv;
HeNEXT(entry) = *oentry;
*oentry = entry;
Expand Down
3 changes: 3 additions & 0 deletions hv.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ C<SV*>.
#define HeKEY(he) HEK_KEY(HeKEY_hek(he))
#define HeKEY_sv(he) (*(SV**)HeKEY(he))
#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
#define HeVAL(he) (he)->hent_val
#define HeHASH(he) HEK_HASH(HeKEY_hek(he))
#define HePV(he,lp) ((HeKLEN(he) == HEf_SVKEY) ? \
Expand All @@ -175,6 +177,7 @@ C<SV*>.
#define HEK_HASH(hek) (hek)->hek_hash
#define HEK_LEN(hek) (hek)->hek_len
#define HEK_KEY(hek) (hek)->hek_key
#define HEK_UTF8(hek) (*(HEK_KEY(hek)+HEK_LEN(hek)))

/* calculate HV array allocation */
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
Expand Down
Loading

0 comments on commit da58a35

Please sign in to comment.