Skip to content

Commit

Permalink
bpo-29240: Fix locale encodings in UTF-8 Mode (python#5170)
Browse files Browse the repository at this point in the history
Modify locale.localeconv(), time.tzname, os.strerror() and other
functions to ignore the UTF-8 Mode: always use the current locale
encoding.

Changes:

* Add _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx(). On decoding or
  encoding error, they return the position of the error and an error
  message which are used to raise Unicode errors in
  PyUnicode_DecodeLocale() and PyUnicode_EncodeLocale().
* Replace _Py_DecodeCurrentLocale() with _Py_DecodeLocaleEx().
* PyUnicode_DecodeLocale() now uses _Py_DecodeLocaleEx() for all
  cases, especially for the strict error handler.
* Add _Py_DecodeUTF8Ex(): return more information on decoding error
  and supports the strict error handler.
* Rename _Py_EncodeUTF8_surrogateescape() to _Py_EncodeUTF8Ex().
* Replace _Py_EncodeCurrentLocale() with _Py_EncodeLocaleEx().
* Ignore the UTF-8 mode to encode/decode localeconv(), strerror()
  and time zone name.
* Remove PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize()
  and PyUnicode_EncodeLocale() now ignore the UTF-8 mode: always use
  the "current" locale.
* Remove _PyUnicode_DecodeCurrentLocale(),
  _PyUnicode_DecodeCurrentLocaleAndSize() and
  _PyUnicode_EncodeCurrentLocale().
  • Loading branch information
vstinner authored and Jeethu Rao committed Jan 16, 2018
1 parent 378edee commit 438385b
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 44 deletions.
17 changes: 17 additions & 0 deletions Include/code.h
Expand Up @@ -17,6 +17,20 @@ typedef uint16_t _Py_CODEUNIT;
# define _Py_OPARG(word) ((word) >> 8)
#endif

/* Global lookup cache */

typedef enum {
GCACHE_UNITIALIZED=0,
GCACHE_GLOBALS,
GCACHE_BUILTINS
} _PyGlobalLookupCacheType;

typedef struct {
uint64_t version_tag;
_PyGlobalLookupCacheType type;
PyObject *obj;
} _PyGlobalLookupCache;

/* Bytecode object */
typedef struct {
PyObject_HEAD
Expand Down Expand Up @@ -44,6 +58,9 @@ typedef struct {
Objects/lnotab_notes.txt for details. */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
PyObject *co_weakreflist; /* to support weakrefs to code objects */

int co_global_lookups; /* Number of global lookups */

/* Scratch space for extra data relating to the code object.
Type is a void* to keep the format private in codeobject.c to force
people to go through the proper APIs. */
Expand Down
1 change: 0 additions & 1 deletion Include/dictobject.h
Expand Up @@ -170,7 +170,6 @@ PyAPI_FUNC(int) _PyDict_DelItemId(PyObject *mp, struct _Py_Identifier *key);
PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);

int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
#endif

#ifdef __cplusplus
Expand Down
14 changes: 14 additions & 0 deletions Include/internal/globalcache.h
@@ -0,0 +1,14 @@
#ifndef Py_INTERNAL_GLOBALCACHE_H
#define Py_INTERNAL_GLOBALCACHE_H
#ifdef __cplusplus
extern "C" {
#endif

PyObject *
_PyCode_LoadGlobalCached(PyCodeObject *code, PyDictObject *globals, PyDictObject *builtins, int offset);

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_GLOBALCACHE_H */

152 changes: 152 additions & 0 deletions Objects/codeobject.c
Expand Up @@ -3,6 +3,7 @@
#include "Python.h"
#include "code.h"
#include "structmember.h"
#include "dict-common.h"

/* Holder for co_extra information */
typedef struct {
Expand Down Expand Up @@ -206,6 +207,10 @@ PyCode_New(int argcount, int kwonlyargcount,
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
co->co_extra = NULL;
if(PyTuple_GET_SIZE(names) == 0)
co->co_global_lookups = -1;
else
co->co_global_lookups = 0;
return co;
}

Expand Down Expand Up @@ -889,3 +894,150 @@ _PyCode_SetExtra(PyObject *code, Py_ssize_t index, void *extra)
co_extra->ce_extras[index] = extra;
return 0;
}

/* Number of calls, after which to start caching global lookup */

#define GLOBAL_CACHE_THRESHOLD 32

#define SHOULD_USE_GLOBAL_CACHE(code) (code->co_global_lookups >= 0)
#define REACHED_GLOBAL_LOOKUP_THRESHOLD(code) (code->co_global_lookups == \
GLOBAL_CACHE_THRESHOLD)

/* Tuple access macros */

#ifndef Py_DEBUG
#define GETITEM(v, i) PyTuple_GET_ITEM((PyTupleObject *)(v), (i))
#else
#define GETITEM(v, i) PyTuple_GetItem((v), (i))
#endif

static Py_ssize_t _globalscache_index = -1;

static inline int
_PyCode_GetGlobalCache(PyCodeObject* code, _PyGlobalLookupCache **cache) {
if(_globalscache_index < 0) {
_globalscache_index = _PyEval_RequestCodeExtraIndex((freefunc)
PyMem_Free);
}
return _PyCode_GetExtra((PyObject*)code, _globalscache_index,
(void**)cache);
}

static inline int
_PyCode_SetGlobalCache(PyCodeObject* code, _PyGlobalLookupCache* cache) {
if(_globalscache_index < 0) {
_globalscache_index = _PyEval_RequestCodeExtraIndex((freefunc)
PyMem_Free);
}
return _PyCode_SetExtra((PyObject*)code, _globalscache_index,
(void*)cache);
}

static inline int _PyCode_CacheGlobal(PyCodeObject *code, PyDictObject *globals,
PyDictObject *builtins, PyObject* value,
int offset, _PyGlobalLookupCacheType tp) {
_PyGlobalLookupCache *globals_cache = NULL;
if(code->co_global_lookups < GLOBAL_CACHE_THRESHOLD)
code->co_global_lookups++;
else if(code->co_global_lookups == GLOBAL_CACHE_THRESHOLD) {
if(_PyCode_GetGlobalCache(code, &globals_cache) < 0)
return -1;
if(globals_cache == NULL) {
Py_ssize_t n_globals = PyTuple_GET_SIZE(code->co_names);
globals_cache = (_PyGlobalLookupCache*)\
PyMem_Calloc(n_globals,
sizeof(_PyGlobalLookupCache));
if(globals_cache == NULL)
return -1;
if(_PyCode_SetGlobalCache(code, globals_cache) < 0)
return -1;
}
globals_cache += offset;
globals_cache->obj = value;
globals_cache->version_tag = globals->ma_version_tag;
if(tp == GCACHE_GLOBALS) {
globals_cache->type = GCACHE_GLOBALS;
}
else {
globals_cache->type = GCACHE_BUILTINS;
if(builtins->ma_version_tag > globals_cache->version_tag)
globals_cache->version_tag = builtins->ma_version_tag;
}
}
return 0;
}

/* Fast version of global value lookup (LOAD_GLOBAL).
* Lookup in globals, then builtins.
*
* Raise an exception and return NULL if an error occurred (ex: computing the
* key hash failed, key comparison failed, ...). Return NULL if the key doesn't
* exist. Return the value if the key exists.
*/
PyObject* _Py_HOT_FUNCTION
_PyCode_LoadGlobalCached(PyCodeObject *code,
PyDictObject *globals, PyDictObject *builtins,
int offset) {
Py_ssize_t ix;
Py_hash_t hash;
PyObject *key, *value;
_PyGlobalLookupCache *globals_cache;
uint16_t version_tag;

if(SHOULD_USE_GLOBAL_CACHE(code) && REACHED_GLOBAL_LOOKUP_THRESHOLD(code)) {
if(_PyCode_GetGlobalCache(code, &globals_cache) < 0)
return NULL;
if(globals_cache != NULL) {
assert(offset >= 0 && offset < PyTuple_Size(code->co_names));
globals_cache += offset;
if(globals_cache->type == GCACHE_GLOBALS) {
if(globals_cache->version_tag == globals->ma_version_tag) {
return globals_cache->obj;
}
}
else if(globals_cache->type == GCACHE_BUILTINS) {
version_tag = globals->ma_version_tag;
if(builtins->ma_version_tag > version_tag)
version_tag = builtins->ma_version_tag;
if(globals_cache->version_tag == version_tag) {
return globals_cache->obj;
}
}
globals_cache->type = GCACHE_UNITIALIZED;
}
}

key = GETITEM(code->co_names, offset);
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1)
return NULL;
}

/* namespace 1: globals */
ix = globals->ma_keys->dk_lookup(globals, key, hash, &value);
if (ix == DKIX_ERROR)
return NULL;
if (ix != DKIX_EMPTY && value != NULL) {
if(SHOULD_USE_GLOBAL_CACHE(code) && _PyCode_CacheGlobal(code, globals, builtins,
value, offset,
GCACHE_GLOBALS) == -1)
return NULL;
return value;
}

/* namespace 2: builtins */
ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value);
if (ix < 0)
return NULL;

if(ix != DKIX_EMPTY && value != NULL) {
if(SHOULD_USE_GLOBAL_CACHE(code) && _PyCode_CacheGlobal(code, globals, builtins,
value, offset,
GCACHE_BUILTINS) == -1)
return NULL;
}
return value;
}
36 changes: 0 additions & 36 deletions Objects/dictobject.c
Expand Up @@ -1344,42 +1344,6 @@ _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
return PyDict_GetItemWithError(dp, kv);
}

/* Fast version of global value lookup (LOAD_GLOBAL).
* Lookup in globals, then builtins.
*
* Raise an exception and return NULL if an error occurred (ex: computing the
* key hash failed, key comparison failed, ...). Return NULL if the key doesn't
* exist. Return the value if the key exists.
*/
PyObject *
_PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
{
Py_ssize_t ix;
Py_hash_t hash;
PyObject *value;

if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1)
return NULL;
}

/* namespace 1: globals */
ix = globals->ma_keys->dk_lookup(globals, key, hash, &value);
if (ix == DKIX_ERROR)
return NULL;
if (ix != DKIX_EMPTY && value != NULL)
return value;

/* namespace 2: builtins */
ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value);
if (ix < 0)
return NULL;
return value;
}

/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
* dictionary if it's merely replacing the value for an existing key.
* This means that it's safe to loop over a dictionary with PyDict_Next()
Expand Down
16 changes: 9 additions & 7 deletions Python/ceval.c
Expand Up @@ -20,6 +20,8 @@
#include "setobject.h"
#include "structmember.h"

#include "internal/globalcache.h"

#include <ctype.h>

#ifdef Py_DEBUG
Expand Down Expand Up @@ -527,7 +529,6 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
NULL, NULL);
}


/* Interpreter main loop */

PyObject *
Expand Down Expand Up @@ -2147,27 +2148,28 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}

TARGET(LOAD_GLOBAL) {
PyObject *name = GETITEM(names, oparg);
PyObject *v;
if (PyDict_CheckExact(f->f_globals)
&& PyDict_CheckExact(f->f_builtins))
{
v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals,
(PyDictObject *)f->f_builtins,
name);
v = _PyCode_LoadGlobalCached(co,
(PyDictObject *)f->f_globals,
(PyDictObject *)f->f_builtins,
oparg);
if (v == NULL) {
if (!_PyErr_OCCURRED()) {
/* _PyDict_LoadGlobal() returns NULL without raising
/* _PyEval_LoadGlobalCached() returns NULL without raising
* an exception if the key doesn't exist */
format_exc_check_arg(PyExc_NameError,
NAME_ERROR_MSG, name);
NAME_ERROR_MSG, GETITEM(names, oparg));
}
goto error;
}
Py_INCREF(v);
}
else {
/* Slow-path if globals or builtins is not a dict */
PyObject *name = GETITEM(names, oparg);

/* namespace 1: globals */
v = PyObject_GetItem(f->f_globals, name);
Expand Down

0 comments on commit 438385b

Please sign in to comment.