From 442c1a0afad8adf086242870e70e3df37ba25901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20Nouvertn=C3=A9?= Date: Sun, 26 Oct 2025 13:16:13 +0100 Subject: [PATCH 1/5] fix --- src/msgspec/_core.c | 7 ++++++- tests/unit/test_struct_meta.py | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/msgspec/_core.c b/src/msgspec/_core.c index 4ccdbdb7..f1ab79dc 100644 --- a/src/msgspec/_core.c +++ b/src/msgspec/_core.c @@ -1909,6 +1909,7 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { Meta *out = (Meta *)Meta_Type.tp_alloc(&Meta_Type, 0); if (out == NULL) return NULL; +// set fields on Meta and increase their refcount #define SET_FIELD(x) do { Py_XINCREF(x); out->x = x; } while(0) SET_FIELD(gt); SET_FIELD(ge); @@ -1916,7 +1917,6 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { SET_FIELD(le); SET_FIELD(multiple_of); SET_FIELD(pattern); - SET_FIELD(regex); SET_FIELD(min_length); SET_FIELD(max_length); SET_FIELD(tz); @@ -1926,6 +1926,11 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { SET_FIELD(extra_json_schema); SET_FIELD(extra); #undef SET_FIELD + + // set fields on Meta without increasing their refcount + // regex was created by a PyObject_CallOneArg call, so refcount started out as 1; no need to increase + out->regex = regex; + return (PyObject *)out; } diff --git a/tests/unit/test_struct_meta.py b/tests/unit/test_struct_meta.py index 1904d31b..059ceb5f 100644 --- a/tests/unit/test_struct_meta.py +++ b/tests/unit/test_struct_meta.py @@ -1,4 +1,8 @@ """Tests for the exposed StructMeta metaclass.""" +import gc +import re +import secrets +import uuid from abc import ABCMeta, _abc_init, abstractmethod @@ -624,3 +628,22 @@ def foo(self) -> int: c = Concrete(5) assert c.foo() == 5 + + +def test_struct_meta_pattern_ref_leak(): + # ensure that we're not keeping around references to re.Pattern longer than necessary + # see https://github.com/jcrist/msgspec/pull/899 for details + + # clear cache to get a baseline + re.purge() + + # use a random string to create a pattern, to ensure there can never be an overlap + # with any cached pattern + pattern_string = secrets.token_hex() + msgspec.Meta(pattern=pattern_string) + # purge cache and gc again + re.purge() + gc.collect() + # there should be no re.Pattern any more with our pattern anymore. if there is, it's + # being kept alive by some reference + assert not any(o for o in gc.get_objects() if isinstance(o, re.Pattern) and o.pattern == pattern_string) From 977a44485920b4ae2b24789c38c950505fcecdf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20Nouvertn=C3=A9?= Date: Sun, 26 Oct 2025 19:24:50 +0100 Subject: [PATCH 2/5] formatting --- tests/unit/test_struct_meta.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_struct_meta.py b/tests/unit/test_struct_meta.py index 059ceb5f..5fc50411 100644 --- a/tests/unit/test_struct_meta.py +++ b/tests/unit/test_struct_meta.py @@ -1,8 +1,8 @@ """Tests for the exposed StructMeta metaclass.""" + import gc import re import secrets -import uuid from abc import ABCMeta, _abc_init, abstractmethod @@ -646,4 +646,8 @@ def test_struct_meta_pattern_ref_leak(): gc.collect() # there should be no re.Pattern any more with our pattern anymore. if there is, it's # being kept alive by some reference - assert not any(o for o in gc.get_objects() if isinstance(o, re.Pattern) and o.pattern == pattern_string) + assert not any( + o + for o in gc.get_objects() + if isinstance(o, re.Pattern) and o.pattern == pattern_string + ) From bc5b1e034a2b288923db7e0c50a3325b9b05b29b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20Nouvertn=C3=A9?= Date: Tue, 11 Nov 2025 19:04:11 +0100 Subject: [PATCH 3/5] fix comment :) --- tests/unit/test_struct_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_struct_meta.py b/tests/unit/test_struct_meta.py index 5fc50411..cbe4f6be 100644 --- a/tests/unit/test_struct_meta.py +++ b/tests/unit/test_struct_meta.py @@ -644,7 +644,7 @@ def test_struct_meta_pattern_ref_leak(): # purge cache and gc again re.purge() gc.collect() - # there should be no re.Pattern any more with our pattern anymore. if there is, it's + # there shouldn't be an re.Pattern with our pattern any more. if there is, it's # being kept alive by some reference assert not any( o From 155097e1b2f63ecce9659f3df0cda2349958a67f Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sun, 23 Nov 2025 13:51:39 -0500 Subject: [PATCH 4/5] fix static analysis --- tests/unit/test_struct_meta.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_struct_meta.py b/tests/unit/test_struct_meta.py index cbe4f6be..3f85c051 100644 --- a/tests/unit/test_struct_meta.py +++ b/tests/unit/test_struct_meta.py @@ -3,7 +3,6 @@ import gc import re import secrets - from abc import ABCMeta, _abc_init, abstractmethod import pytest From efd64cdec12175b886bf8d264d115b427e1074b4 Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sun, 23 Nov 2025 15:19:17 -0500 Subject: [PATCH 5/5] clean up --- src/msgspec/_core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/msgspec/_core.c b/src/msgspec/_core.c index f1ab79dc..d4151cd8 100644 --- a/src/msgspec/_core.c +++ b/src/msgspec/_core.c @@ -1907,16 +1907,22 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { } Meta *out = (Meta *)Meta_Type.tp_alloc(&Meta_Type, 0); - if (out == NULL) return NULL; + if (out == NULL) { + Py_XDECREF(regex); + return NULL; + } -// set fields on Meta and increase their refcount +/* SET_FIELD handles borrowed values that need an extra INCREF. + * SET_FIELD_OWNED passes through references we already own. */ #define SET_FIELD(x) do { Py_XINCREF(x); out->x = x; } while(0) +#define SET_FIELD_OWNED(x) do { out->x = x; } while(0) SET_FIELD(gt); SET_FIELD(ge); SET_FIELD(lt); SET_FIELD(le); SET_FIELD(multiple_of); SET_FIELD(pattern); + SET_FIELD_OWNED(regex); SET_FIELD(min_length); SET_FIELD(max_length); SET_FIELD(tz); @@ -1926,10 +1932,7 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { SET_FIELD(extra_json_schema); SET_FIELD(extra); #undef SET_FIELD - - // set fields on Meta without increasing their refcount - // regex was created by a PyObject_CallOneArg call, so refcount started out as 1; no need to increase - out->regex = regex; +#undef SET_FIELD_OWNED return (PyObject *)out; }