Skip to content

Commit

Permalink
Include the struct type in frozen hash implementation
Browse files Browse the repository at this point in the history
This modifies the provided hash method to also include the struct type.
This improves hash quality for mappings or sets that contain multiple
different struct types as keys, as structs of different types with the
same data will now hash differently, better matching the `__eq__`
implementation.

As an implementation detail, structs should now hash as a tuple of the
type and all following elements.

```python
from msgspec import Struct

class Point(Struct, frozen=True):
    x: int
    y: int

assert hash(Point(1, 2)) == hash((Point, 1, 2))
```
  • Loading branch information
jcrist committed Nov 27, 2023
1 parent 36b6aa8 commit ab9e437
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
19 changes: 13 additions & 6 deletions msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -7140,19 +7140,26 @@ Struct_hash(PyObject *self) {
return PyObject_HashNotImplemented(self);
}

/* First hash the type by its pointer */
size_t type_id = (size_t)((void *)st_type);
/* The lower bits are likely to be 0; rotate by 4 */
type_id = (type_id >> 4) | (type_id << (8 * sizeof(void *) - 4));
acc += type_id * MS_HASH_XXPRIME_2;
acc = MS_HASH_XXROTATE(acc);
acc *= MS_HASH_XXPRIME_1;

/* Then hash all the fields */
nfields = StructMeta_GET_NFIELDS(Py_TYPE(self));

for (i = 0; i < nfields; i++) {
Py_uhash_t lane;
val = Struct_get_index(self, i);
if (val == NULL) return -1;
lane = PyObject_Hash(val);
if (lane == (Py_uhash_t)-1) return -1;
acc += lane * MS_HASH_XXPRIME_2;
Py_uhash_t item_hash = PyObject_Hash(val);
if (item_hash == (Py_uhash_t)-1) return -1;
acc += item_hash * MS_HASH_XXPRIME_2;
acc = MS_HASH_XXROTATE(acc);
acc *= MS_HASH_XXPRIME_1;
}
acc += nfields ^ (MS_HASH_XXPRIME_5 ^ 3527539UL);
acc += (1 + nfields) ^ (MS_HASH_XXPRIME_5 ^ 3527539UL);
return (acc == (Py_uhash_t)-1) ? 1546275796 : acc;
}

Expand Down
10 changes: 10 additions & 0 deletions tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,6 +1460,16 @@ def test_frozen_hash_mutable_objects_hash_errors(self):
with pytest.raises(TypeError, match="unhashable type"):
hash(p)

def test_hash_includes_type(self):
Ex1 = defstruct("Ex1", ["x"], frozen=True)
Ex2 = defstruct("Ex2", ["x"], frozen=True)
Ex3 = defstruct("Ex3", [], frozen=True)
Ex4 = defstruct("Ex4", [], frozen=True)
assert hash(Ex1(1)) == hash(Ex1(1))
assert hash(Ex1(1)) != hash(Ex2(1))
assert hash(Ex3()) == hash(Ex3())
assert hash(Ex3()) != hash(Ex4())

@pytest.mark.parametrize("base_gc", [True, None, False])
@pytest.mark.parametrize("base_frozen", [True, False])
@pytest.mark.parametrize("has_gc", [True, None, False])
Expand Down

0 comments on commit ab9e437

Please sign in to comment.