Skip to content

Commit

Permalink
pythongh-108314: Add PyDict_ContainsString() function (python#108323)
Browse files Browse the repository at this point in the history
  • Loading branch information
vstinner committed Aug 24, 2023
1 parent c163d7f commit 6726626
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 35 deletions.
20 changes: 16 additions & 4 deletions Doc/c-api/dict.rst
Expand Up @@ -55,6 +55,15 @@ Dictionary Objects
This is equivalent to the Python expression ``key in p``.
.. c:function:: int PyDict_ContainsString(PyObject *p, const char *key)
This is the same as :c:func:`PyDict_Contains`, but *key* is specified as a
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. versionadded:: 3.13
.. c:function:: PyObject* PyDict_Copy(PyObject *p)
Return a new dictionary that contains the same key-value pairs as *p*.
Expand All @@ -73,7 +82,7 @@ Dictionary Objects
.. index:: single: PyUnicode_FromString()
Insert *val* into the dictionary *p* using *key* as a key. *key* should
be a :c:expr:`const char*`. The key object is created using
be a :c:expr:`const char*` UTF-8 encoded bytes string. The key object is created using
``PyUnicode_FromString(key)``. Return ``0`` on success or ``-1`` on
failure. This function *does not* steal a reference to *val*.
Expand All @@ -88,7 +97,8 @@ Dictionary Objects
.. c:function:: int PyDict_DelItemString(PyObject *p, const char *key)
Remove the entry in dictionary *p* which has a key specified by the string *key*.
Remove the entry in dictionary *p* which has a key specified by the UTF-8
encoded bytes string *key*.
If *key* is not in the dictionary, :exc:`KeyError` is raised.
Return ``0`` on success or ``-1`` on failure.
Expand Down Expand Up @@ -136,7 +146,8 @@ Dictionary Objects
.. c:function:: PyObject* PyDict_GetItemString(PyObject *p, const char *key)
This is the same as :c:func:`PyDict_GetItem`, but *key* is specified as a
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. note::
Expand All @@ -150,7 +161,8 @@ Dictionary Objects
.. c:function:: int PyDict_GetItemStringRef(PyObject *p, const char *key, PyObject **result)
Similar than :c:func:`PyDict_GetItemRef`, but *key* is specified as a
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. versionadded:: 3.13
Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.13.rst
Expand Up @@ -862,6 +862,11 @@ New Features
not needed.
(Contributed by Victor Stinner in :gh:`106004`.)

* Added :c:func:`PyDict_ContainsString` function: same as
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
(Contributed by Victor Stinner in :gh:`108314`.)

* Add :c:func:`Py_IsFinalizing` function: check if the main Python interpreter is
:term:`shutting down <interpreter shutdown>`.
(Contributed by Victor Stinner in :gh:`108014`.)
Expand Down
1 change: 1 addition & 0 deletions Include/cpython/dictobject.h
Expand Up @@ -55,6 +55,7 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
}
#define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op))

PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key);
PyAPI_FUNC(int) _PyDict_ContainsId(PyObject *, _Py_Identifier *);

PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
Expand Down
24 changes: 20 additions & 4 deletions Lib/test/test_capi/test_dict.py
Expand Up @@ -8,6 +8,7 @@


NULL = None
INVALID_UTF8 = b'\xff'

class DictSubclass(dict):
def __getitem__(self, key):
Expand Down Expand Up @@ -137,7 +138,7 @@ def test_dict_getitemstring(self):
self.assertEqual(getitemstring(dct2, b'a'), 1)
self.assertIs(getitemstring(dct2, b'b'), KeyError)

self.assertIs(getitemstring({}, b'\xff'), KeyError)
self.assertIs(getitemstring({}, INVALID_UTF8), KeyError)
self.assertIs(getitemstring(42, b'a'), KeyError)
self.assertIs(getitemstring([], b'a'), KeyError)
# CRASHES getitemstring({}, NULL)
Expand Down Expand Up @@ -173,7 +174,7 @@ def test_dict_getitemstringref(self):
self.assertIs(getitemstring(dct2, b'b'), KeyError)

self.assertRaises(SystemError, getitemstring, 42, b'a')
self.assertRaises(UnicodeDecodeError, getitemstring, {}, b'\xff')
self.assertRaises(UnicodeDecodeError, getitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, getitemstring, [], b'a')
# CRASHES getitemstring({}, NULL)
# CRASHES getitemstring(NULL, b'a')
Expand Down Expand Up @@ -213,6 +214,21 @@ def test_dict_contains(self):
# CRASHES contains(42, 'a')
# CRASHES contains(NULL, 'a')

def test_dict_contains_string(self):
contains_string = _testcapi.dict_containsstring
dct = {'a': 1, '\U0001f40d': 2}
self.assertTrue(contains_string(dct, b'a'))
self.assertFalse(contains_string(dct, b'b'))
self.assertTrue(contains_string(dct, '\U0001f40d'.encode()))
self.assertRaises(UnicodeDecodeError, contains_string, dct, INVALID_UTF8)

dct2 = DictSubclass(dct)
self.assertTrue(contains_string(dct2, b'a'))
self.assertFalse(contains_string(dct2, b'b'))

# CRASHES contains({}, NULL)
# CRASHES contains(NULL, b'a')

def test_dict_setitem(self):
setitem = _testcapi.dict_setitem
dct = {}
Expand Down Expand Up @@ -245,7 +261,7 @@ def test_dict_setitemstring(self):
setitemstring(dct2, b'a', 5)
self.assertEqual(dct2, {'a': 5})

self.assertRaises(UnicodeDecodeError, setitemstring, {}, b'\xff', 5)
self.assertRaises(UnicodeDecodeError, setitemstring, {}, INVALID_UTF8, 5)
self.assertRaises(SystemError, setitemstring, UserDict(), b'a', 5)
self.assertRaises(SystemError, setitemstring, 42, b'a', 5)
# CRASHES setitemstring({}, NULL, 5)
Expand Down Expand Up @@ -287,7 +303,7 @@ def test_dict_delitemstring(self):
self.assertEqual(dct2, {'c': 2})
self.assertRaises(KeyError, delitemstring, dct2, b'b')

self.assertRaises(UnicodeDecodeError, delitemstring, {}, b'\xff')
self.assertRaises(UnicodeDecodeError, delitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, delitemstring, UserDict({'a': 1}), b'a')
self.assertRaises(SystemError, delitemstring, 42, b'a')
# CRASHES delitemstring({}, NULL)
Expand Down
@@ -0,0 +1,4 @@
Add :c:func:`PyDict_ContainsString` function: same as
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
Patch by Victor Stinner.
34 changes: 20 additions & 14 deletions Modules/_ctypes/_ctypes.c
Expand Up @@ -5139,8 +5139,6 @@ static PyObject *
Pointer_get_contents(CDataObject *self, void *closure)
{
StgDictObject *stgdict;
PyObject *keep, *ptr_probe;
CDataObject *ptr2ptr;

if (*(void **)self->b_ptr == NULL) {
PyErr_SetString(PyExc_ValueError,
Expand All @@ -5151,29 +5149,37 @@ Pointer_get_contents(CDataObject *self, void *closure)
stgdict = PyObject_stgdict((PyObject *)self);
assert(stgdict); /* Cannot be NULL for pointer instances */

keep = GetKeepedObjects(self);
PyObject *keep = GetKeepedObjects(self);
if (keep != NULL) {
// check if it's a pointer to a pointer:
// pointers will have '0' key in the _objects
ptr_probe = PyDict_GetItemString(keep, "0");

if (ptr_probe != NULL) {
ptr2ptr = (CDataObject*) PyDict_GetItemString(keep, "1");
if (ptr2ptr == NULL) {
int ptr_probe = PyDict_ContainsString(keep, "0");
if (ptr_probe < 0) {
return NULL;
}
if (ptr_probe) {
PyObject *item;
if (PyDict_GetItemStringRef(keep, "1", &item) < 0) {
return NULL;
}
if (item == NULL) {
PyErr_SetString(PyExc_ValueError,
"Unexpected NULL pointer in _objects");
"Unexpected NULL pointer in _objects");
return NULL;
}
// don't construct a new object,
// return existing one instead to preserve refcount
#ifndef NDEBUG
CDataObject *ptr2ptr = (CDataObject *)item;
// Don't construct a new object,
// return existing one instead to preserve refcount.
// Double-check that we are returning the same thing.
assert(
*(void**) self->b_ptr == ptr2ptr->b_ptr ||
*(void**) self->b_value.c == ptr2ptr->b_ptr ||
*(void**) self->b_ptr == ptr2ptr->b_value.c ||
*(void**) self->b_value.c == ptr2ptr->b_value.c
); // double-check that we are returning the same thing
Py_INCREF(ptr2ptr);
return (PyObject *) ptr2ptr;
);
#endif
return item;
}
}

Expand Down
14 changes: 14 additions & 0 deletions Modules/_testcapi/dict.c
Expand Up @@ -74,6 +74,19 @@ dict_contains(PyObject *self, PyObject *args)
RETURN_INT(PyDict_Contains(obj, key));
}

static PyObject *
dict_containsstring(PyObject *self, PyObject *args)
{
PyObject *obj;
const char *key;
Py_ssize_t size;
if (!PyArg_ParseTuple(args, "Oz#", &obj, &key, &size)) {
return NULL;
}
NULLABLE(obj);
RETURN_INT(PyDict_ContainsString(obj, key));
}

static PyObject *
dict_size(PyObject *self, PyObject *obj)
{
Expand Down Expand Up @@ -349,6 +362,7 @@ static PyMethodDef test_methods[] = {
{"dict_getitemref", dict_getitemref, METH_VARARGS},
{"dict_getitemstringref", dict_getitemstringref, METH_VARARGS},
{"dict_contains", dict_contains, METH_VARARGS},
{"dict_containsstring", dict_containsstring, METH_VARARGS},
{"dict_setitem", dict_setitem, METH_VARARGS},
{"dict_setitemstring", dict_setitemstring, METH_VARARGS},
{"dict_delitem", dict_delitem, METH_VARARGS},
Expand Down
12 changes: 12 additions & 0 deletions Objects/dictobject.c
Expand Up @@ -3741,6 +3741,18 @@ PyDict_Contains(PyObject *op, PyObject *key)
return (ix != DKIX_EMPTY && value != NULL);
}

int
PyDict_ContainsString(PyObject *op, const char *key)
{
PyObject *key_obj = PyUnicode_FromString(key);
if (key_obj == NULL) {
return -1;
}
int res = PyDict_Contains(op, key_obj);
Py_DECREF(key_obj);
return res;
}

/* Internal version of PyDict_Contains used when the hash value is already known */
int
_PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
Expand Down
9 changes: 5 additions & 4 deletions Python/pylifecycle.c
Expand Up @@ -2220,10 +2220,11 @@ add_main_module(PyInterpreterState *interp)
}
Py_DECREF(ann_dict);

if (_PyDict_GetItemStringWithError(d, "__builtins__") == NULL) {
if (PyErr_Occurred()) {
return _PyStatus_ERR("Failed to test __main__.__builtins__");
}
int has_builtins = PyDict_ContainsString(d, "__builtins__");
if (has_builtins < 0) {
return _PyStatus_ERR("Failed to test __main__.__builtins__");
}
if (!has_builtins) {
PyObject *bimod = PyImport_ImportModule("builtins");
if (bimod == NULL) {
return _PyStatus_ERR("Failed to retrieve builtins module");
Expand Down
23 changes: 14 additions & 9 deletions Python/pythonrun.c
Expand Up @@ -413,10 +413,11 @@ _PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
PyObject *dict = PyModule_GetDict(main_module); // borrowed ref

int set_file_name = 0;
if (_PyDict_GetItemStringWithError(dict, "__file__") == NULL) {
if (PyErr_Occurred()) {
goto done;
}
int has_file = PyDict_ContainsString(dict, "__file__");
if (has_file < 0) {
goto done;
}
if (!has_file) {
if (PyDict_SetItemString(dict, "__file__", filename) < 0) {
goto done;
}
Expand Down Expand Up @@ -1713,13 +1714,17 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py
_PyRuntime.signals.unhandled_keyboard_interrupt = 0;

/* Set globals['__builtins__'] if it doesn't exist */
if (globals != NULL && _PyDict_GetItemStringWithError(globals, "__builtins__") == NULL) {
if (PyErr_Occurred() ||
PyDict_SetItemString(globals, "__builtins__",
tstate->interp->builtins) < 0)
{
if (globals != NULL) {
int has_builtins = PyDict_ContainsString(globals, "__builtins__");
if (has_builtins < 0) {
return NULL;
}
if (!has_builtins) {
if (PyDict_SetItemString(globals, "__builtins__",
tstate->interp->builtins) < 0) {
return NULL;
}
}
}

v = PyEval_EvalCode((PyObject*)co, globals, locals);
Expand Down

0 comments on commit 6726626

Please sign in to comment.