/* _PyGenObject_HEAD defines the initial segment of generator
and coroutine objects. */
#define _PyGenObject_HEAD(prefix) \
PyObject_HEAD \
/* Note: gi_frame can be NULL if the generator is "finished" */ \
PyFrameObject *prefix##_frame; \
/* True if generator is being executed. */ \
char prefix##_running; \
/* The code object backing the generator */ \
PyObject *prefix##_code; \
/* List of weak reference. */ \
PyObject *prefix##_weakreflist; \
/* Name of the generator. */ \
PyObject *prefix##_name; \
/* Qualified name of the generator. */ \
PyObject *prefix##_qualname; \
_PyErr_StackItem prefix##_exc_state;
typedef struct {
/* The gi_ prefix is intended to remind of generator-iterator. */
_PyGenObject_HEAD(gi)
} PyGenObject;
创建 PyGenObject:
static PyObject *
gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f,
PyObject *name, PyObject *qualname)
{
PyGenObject *gen = PyObject_GC_New(PyGenObject, type);
if (gen == NULL) {
Py_DECREF(f);
return NULL;
}
gen->gi_frame = f;
f->f_gen = (PyObject *) gen;
Py_INCREF(f->f_code);
gen->gi_code = (PyObject *)(f->f_code);
gen->gi_running = 0;
gen->gi_weakreflist = NULL;
gen->gi_exc_state.exc_type = NULL;
gen->gi_exc_state.exc_value = NULL;
gen->gi_exc_state.exc_traceback = NULL;
gen->gi_exc_state.previous_item = NULL;
if (name != NULL)
gen->gi_name = name;
else
gen->gi_name = ((PyCodeObject *)gen->gi_code)->co_name;
Py_INCREF(gen->gi_name);
if (qualname != NULL)
gen->gi_qualname = qualname;
else
gen->gi_qualname = gen->gi_name;
Py_INCREF(gen->gi_qualname);
_PyObject_GC_TRACK(gen);
return (PyObject *)gen;
}
在
生成器和函数几乎一样, 如果一个函数中出现 yield(yield from), 那么就变成了生成器.
其实生成器完全可以使用别的关键字来定义, 例如将 def
替换为 gen
或者 defgen
, 至于为什么最后还是使用了 def
, 可以阅读 pep 255.
生成器的函数对象的创建几乎完全利用了创建普通函数的代码, 字节码都是 MAKE_FUNCTION
. 当调用生成器的函数对象时, 也利用了调用普通函数的代码, 字节码都是 CALL_FUNCTION
, 区别是生成器的 PyCodeObject 的 co_flags 包含 CO_GENERATOR 标志, CALL_FUNCTION
最终返回的是生成器对象(PyGenObject).
def f():
for i in range(10):
yield i
g = f()
对应的字节码为:
1 0 LOAD_CONST 0 (<code object f at 0x7f2853b089d0, file "mydemo/gen_demo.py", line 1>)
2 LOAD_CONST 1 ('f')
4 MAKE_FUNCTION 0
6 STORE_NAME 0 (f)
5 8 LOAD_NAME 0 (f)
10 CALL_FUNCTION 0
12 STORE_NAME 1 (g)
14 LOAD_CONST 2 (None)
16 RETURN_VALUE
Disassembly of <code object f at 0x7f2853b089d0, file "mydemo/gen_demo.py", line 1>:
2 0 LOAD_GLOBAL 0 (range)
2 LOAD_CONST 1 (10)
4 CALL_FUNCTION 1
6 GET_ITER
>> 8 FOR_ITER 10 (to 20)
10 STORE_FAST 0 (i)
3 12 LOAD_FAST 0 (i)
14 YIELD_VALUE
16 POP_TOP
18 JUMP_ABSOLUTE 8
>> 20 LOAD_CONST 0 (None)
22 RETURN_VALUE
在执行 g = f()
之前, f 仍然是一个普通的函数, 生成器是在调用 f()
时创建的, 创建生成器的代码如下:
PyObject *
_PyEval_EvalCode(PyThreadState *tstate,
PyObject *_co, PyObject *globals, PyObject *locals,
PyObject *const *args, Py_ssize_t argcount,
PyObject *const *kwnames, PyObject *const *kwargs,
Py_ssize_t kwcount, int kwstep,
PyObject *const *defs, Py_ssize_t defcount,
PyObject *kwdefs, PyObject *closure,
PyObject *name, PyObject *qualname)
{
// ...
/* Handle generator/coroutine/asynchronous generator */
if (co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
PyObject *gen;
int is_coro = co->co_flags & CO_COROUTINE;
/* Don't need to keep the reference to f_back, it will be set
* when the generator is resumed. */
Py_CLEAR(f->f_back);
/* Create a new generator that owns the ready to run frame
* and return that as the value. */
if (is_coro) {
gen = PyCoro_New(f, name, qualname);
} else if (co->co_flags & CO_ASYNC_GENERATOR) {
gen = PyAsyncGen_New(f, name, qualname);
} else {
gen = PyGen_NewWithQualName(f, name, qualname);
}
if (gen == NULL) {
return NULL;
}
_PyObject_GC_TRACK(f);
return gen;
}
// ...
}
_PyEval_EvalCode
前面所有的代码都在处理函数的参数(实参, 形参, 位置参数, 关键字参数, 默认参数等等). 处理完之后, 新创键的栈帧就可以投入执行了, 但是发现 code 对象 的 CO_GENERATOR
标志, 因此, 创建生成器对象并返回. 如果是普通的函数, 那么就执行栈帧并返回结果.
生成器支持三个方法: send
, throw
, close
.
static PyMethodDef gen_methods[] = {
{"send",(PyCFunction)_PyGen_Send, METH_O, send_doc},
{"throw",(PyCFunction)gen_throw, METH_VARARGS, throw_doc},
{"close",(PyCFunction)gen_close, METH_NOARGS, close_doc},
{NULL, NULL} /* Sentinel */
};
next(g)
是通过 send
实现的.
def f():
for i in range(10):
yield i
g = f()
# 第一次调用 send 方法, 参数必须是 None
res = g.send(None)
static PyObject *
gen_send_ex(PyGenObject *gen, PyObject *arg, int exc, int closing)
{
PyThreadState *tstate = _PyThreadState_GET();
PyFrameObject *f = gen->gi_frame;
PyObject *result;
if (gen->gi_running) {
const char *msg = "generator already executing";
if (PyCoro_CheckExact(gen)) {
msg = "coroutine already executing";
}
else if (PyAsyncGen_CheckExact(gen)) {
msg = "async generator already executing";
}
PyErr_SetString(PyExc_ValueError, msg);
return NULL;
}
if (f == NULL || f->f_stacktop == NULL) {
if (PyCoro_CheckExact(gen) && !closing) {
/* `gen` is an exhausted coroutine: raise an error,
except when called from gen_close(), which should
always be a silent method. */
PyErr_SetString(
PyExc_RuntimeError,
"cannot reuse already awaited coroutine");
}
else if (arg && !exc) {
/* `gen` is an exhausted generator:
only set exception if called from send(). */
if (PyAsyncGen_CheckExact(gen)) {
PyErr_SetNone(PyExc_StopAsyncIteration);
}
else {
PyErr_SetNone(PyExc_StopIteration);
}
}
return NULL;
}
if (f->f_lasti == -1) {
if (arg && arg != Py_None) {
const char *msg = "can't send non-None value to a "
"just-started generator";
if (PyCoro_CheckExact(gen)) {
msg = NON_INIT_CORO_MSG;
}
else if (PyAsyncGen_CheckExact(gen)) {
msg = "can't send non-None value to a "
"just-started async generator";
}
PyErr_SetString(PyExc_TypeError, msg);
return NULL;
}
} else {
/* Push arg onto the frame's value stack */
result = arg ? arg : Py_None;
Py_INCREF(result);
*(f->f_stacktop++) = result;
}
/* Generators always return to their most recent caller, not
* necessarily their creator. */
Py_XINCREF(tstate->frame);
assert(f->f_back == NULL);
f->f_back = tstate->frame;
gen->gi_running = 1;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
if (exc) {
assert(_PyErr_Occurred(tstate));
_PyErr_ChainStackItem(NULL);
}
result = _PyEval_EvalFrame(tstate, f, exc);
tstate->exc_info = gen->gi_exc_state.previous_item;
gen->gi_exc_state.previous_item = NULL;
gen->gi_running = 0;
/* Don't keep the reference to f_back any longer than necessary. It
* may keep a chain of frames alive or it could create a reference
* cycle. */
assert(f->f_back == tstate->frame);
Py_CLEAR(f->f_back);
/* If the generator just returned (as opposed to yielding), signal
* that the generator is exhausted. */
if (result && f->f_stacktop == NULL) {
if (result == Py_None) {
/* Delay exception instantiation if we can */
if (PyAsyncGen_CheckExact(gen)) {
PyErr_SetNone(PyExc_StopAsyncIteration);
}
else {
PyErr_SetNone(PyExc_StopIteration);
}
}
else {
/* Async generators cannot return anything but None */
assert(!PyAsyncGen_CheckExact(gen));
_PyGen_SetStopIterationValue(result);
}
Py_CLEAR(result);
}
else if (!result && PyErr_ExceptionMatches(PyExc_StopIteration)) {
const char *msg = "generator raised StopIteration";
if (PyCoro_CheckExact(gen)) {
msg = "coroutine raised StopIteration";
}
else if (PyAsyncGen_CheckExact(gen)) {
msg = "async generator raised StopIteration";
}
_PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg);
}
else if (!result && PyAsyncGen_CheckExact(gen) &&
PyErr_ExceptionMatches(PyExc_StopAsyncIteration))
{
/* code in `gen` raised a StopAsyncIteration error:
raise a RuntimeError.
*/
const char *msg = "async generator raised StopAsyncIteration";
_PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg);
}
if (!result || f->f_stacktop == NULL) {
/* generator can't be rerun, so release the frame */
/* first clean reference cycle through stored exception traceback */
_PyErr_ClearExcState(&gen->gi_exc_state);
gen->gi_frame->f_gen = NULL;
gen->gi_frame = NULL;
Py_DECREF(f);
}
return result;
}
PyObject *
_PyGen_Send(PyGenObject *gen, PyObject *arg)
{
return gen_send_ex(gen, arg, 0, 0);
}
gen_send_ex
的核心功能就是调用 _PyEval_EvalFrame
对生成器进行求值.
在 _PyEval_EvalFrameDefault
函数有下面的代码:
stack_pointer = f->f_stacktop;
assert(stack_pointer != NULL);
f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */
f->f_executing = 1;
stack_pointer
是函数局部变量, 指向栈顶. 如果是普通函数, f->f_stacktop
的值是在创建 PyFrameObject 时的初始化值, 指向 f->f_valuestack
. 如果是生成器, 那么 f->f_stacktop
指向上一次的栈顶.
case TARGET(YIELD_VALUE): {
retval = POP();
if (co->co_flags & CO_ASYNC_GENERATOR) {
PyObject *w = _PyAsyncGenValueWrapperNew(retval);
Py_DECREF(retval);
if (w == NULL) {
retval = NULL;
goto error;
}
retval = w;
}
// 保存栈顶指针
f->f_stacktop = stack_pointer;
goto exiting;
}
YIELD_VALUE
在返回之前将栈顶指针保存到 f->f_stacktop
, 这样一来当重新执行生成器时, 才可以恢复之前的栈.
case TARGET(YIELD_FROM): {
PyObject *v = POP();
PyObject *receiver = TOP();
int err;
if (PyGen_CheckExact(receiver) || PyCoro_CheckExact(receiver)) {
retval = _PyGen_Send((PyGenObject *)receiver, v);
} else {
_Py_IDENTIFIER(send);
if (v == Py_None)
retval = Py_TYPE(receiver)->tp_iternext(receiver);
else
retval = _PyObject_CallMethodIdOneArg(receiver, &PyId_send, v);
}
Py_DECREF(v);
if (retval == NULL) {
// retval == NULL 说明 yield from 的 receiver 执行完了
PyObject *val;
if (tstate->c_tracefunc != NULL
&& _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, f);
// 从 StopIteration 对象中取出 value 属性并赋值给 val
err = _PyGen_FetchStopIterationValue(&val);
if (err < 0)
goto error;
Py_DECREF(receiver);
// 将 val 保存到栈顶. 当执行 YIELD_FROM 的下一条字节码时, `ret = yield from receiver` 的 ret 就等于这里的 val.
SET_TOP(val);
DISPATCH();
}
/* receiver remains on stack, retval is value to be yielded */
f->f_stacktop = stack_pointer;
/* and repeat... */
assert(f->f_lasti >= (int)sizeof(_Py_CODEUNIT));
// f->f_lasti 倒退一步, 回到 YIELD_FROM. 不停执行 YIELD_FROM, 直到 receiver 耗尽.
f->f_lasti -= sizeof(_Py_CODEUNIT);
goto exiting;
}
YIELD_FROM
的前一条字节码 LOAD_CONST 0 (None)
, None
作为 YIELD_FROM
第一次执行发送到 receiver 的初始值, 也就是说 PyObject *v = POP();
的 v 等于 None.
当 YIELD_FROM
第二次执行时, PyObject *v = POP();
的 v 等于上层调用者调用 g.send(val)
发送的值.
throw 方法的基本原理是:
PyErr_Restore(typ, val, tb);
return gen_send_ex(gen, Py_None, 1, 0);
具体查看我在代码中的注释.
具体查看我在代码中的注释.
看完生成器的实现代码, 终于理解了生成器的实现原理, 以及如何在生成器的基础上实现协程.