Skip to content

Latest commit

 

History

History
445 lines (375 loc) · 14.8 KB

生成器.md

File metadata and controls

445 lines (375 loc) · 14.8 KB

生成器

/* _PyGenObject_HEAD defines the initial segment of generator
   and coroutine objects. */
#define _PyGenObject_HEAD(prefix)                                           \
    PyObject_HEAD                                                           \
    /* Note: gi_frame can be NULL if the generator is "finished" */         \
    PyFrameObject *prefix##_frame;                                          \
    /* True if generator is being executed. */                              \
    char prefix##_running;                                                  \
    /* The code object backing the generator */                             \
    PyObject *prefix##_code;                                                \
    /* List of weak reference. */                                           \
    PyObject *prefix##_weakreflist;                                         \
    /* Name of the generator. */                                            \
    PyObject *prefix##_name;                                                \
    /* Qualified name of the generator. */                                  \
    PyObject *prefix##_qualname;                                            \
    _PyErr_StackItem prefix##_exc_state;

typedef struct {
    /* The gi_ prefix is intended to remind of generator-iterator. */
    _PyGenObject_HEAD(gi)
} PyGenObject;

创建 PyGenObject:

static PyObject *
gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f,
                      PyObject *name, PyObject *qualname)
{
    PyGenObject *gen = PyObject_GC_New(PyGenObject, type);
    if (gen == NULL) {
        Py_DECREF(f);
        return NULL;
    }
    gen->gi_frame = f;
    f->f_gen = (PyObject *) gen;
    Py_INCREF(f->f_code);
    gen->gi_code = (PyObject *)(f->f_code);
    gen->gi_running = 0;
    gen->gi_weakreflist = NULL;
    gen->gi_exc_state.exc_type = NULL;
    gen->gi_exc_state.exc_value = NULL;
    gen->gi_exc_state.exc_traceback = NULL;
    gen->gi_exc_state.previous_item = NULL;
    if (name != NULL)
        gen->gi_name = name;
    else
        gen->gi_name = ((PyCodeObject *)gen->gi_code)->co_name;
    Py_INCREF(gen->gi_name);
    if (qualname != NULL)
        gen->gi_qualname = qualname;
    else
        gen->gi_qualname = gen->gi_name;
    Py_INCREF(gen->gi_qualname);
    _PyObject_GC_TRACK(gen);
    return (PyObject *)gen;
}

生成器和函数几乎一样, 如果一个函数中出现 yield(yield from), 那么就变成了生成器.

其实生成器完全可以使用别的关键字来定义, 例如将 def 替换为 gen 或者 defgen, 至于为什么最后还是使用了 def, 可以阅读 pep 255.

生成器的函数对象的创建几乎完全利用了创建普通函数的代码, 字节码都是 MAKE_FUNCTION. 当调用生成器的函数对象时, 也利用了调用普通函数的代码, 字节码都是 CALL_FUNCTION, 区别是生成器的 PyCodeObject 的 co_flags 包含 CO_GENERATOR 标志, CALL_FUNCTION 最终返回的是生成器对象(PyGenObject).

创建生成器

def f():
    for i in range(10):
        yield i

g = f()

对应的字节码为:

  1           0 LOAD_CONST               0 (<code object f at 0x7f2853b089d0, file "mydemo/gen_demo.py", line 1>)
              2 LOAD_CONST               1 ('f')
              4 MAKE_FUNCTION            0
              6 STORE_NAME               0 (f)

  5           8 LOAD_NAME                0 (f)
             10 CALL_FUNCTION            0
             12 STORE_NAME               1 (g)
             14 LOAD_CONST               2 (None)
             16 RETURN_VALUE

Disassembly of <code object f at 0x7f2853b089d0, file "mydemo/gen_demo.py", line 1>:
  2           0 LOAD_GLOBAL              0 (range)
              2 LOAD_CONST               1 (10)
              4 CALL_FUNCTION            1
              6 GET_ITER
        >>    8 FOR_ITER                10 (to 20)
             10 STORE_FAST               0 (i)

  3          12 LOAD_FAST                0 (i)
             14 YIELD_VALUE
             16 POP_TOP
             18 JUMP_ABSOLUTE            8
        >>   20 LOAD_CONST               0 (None)
             22 RETURN_VALUE

在执行 g = f() 之前, f 仍然是一个普通的函数, 生成器是在调用 f() 时创建的, 创建生成器的代码如下:

PyObject *
_PyEval_EvalCode(PyThreadState *tstate,
           PyObject *_co, PyObject *globals, PyObject *locals,
           PyObject *const *args, Py_ssize_t argcount,
           PyObject *const *kwnames, PyObject *const *kwargs,
           Py_ssize_t kwcount, int kwstep,
           PyObject *const *defs, Py_ssize_t defcount,
           PyObject *kwdefs, PyObject *closure,
           PyObject *name, PyObject *qualname)
{
    // ...
    /* Handle generator/coroutine/asynchronous generator */
    if (co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
        PyObject *gen;
        int is_coro = co->co_flags & CO_COROUTINE;

        /* Don't need to keep the reference to f_back, it will be set
         * when the generator is resumed. */
        Py_CLEAR(f->f_back);

        /* Create a new generator that owns the ready to run frame
         * and return that as the value. */
        if (is_coro) {
            gen = PyCoro_New(f, name, qualname);
        } else if (co->co_flags & CO_ASYNC_GENERATOR) {
            gen = PyAsyncGen_New(f, name, qualname);
        } else {
            gen = PyGen_NewWithQualName(f, name, qualname);
        }
        if (gen == NULL) {
            return NULL;
        }

        _PyObject_GC_TRACK(f);

        return gen;
    }
    // ...
}

_PyEval_EvalCode 前面所有的代码都在处理函数的参数(实参, 形参, 位置参数, 关键字参数, 默认参数等等). 处理完之后, 新创键的栈帧就可以投入执行了, 但是发现 code 对象 的 CO_GENERATOR 标志, 因此, 创建生成器对象并返回. 如果是普通的函数, 那么就执行栈帧并返回结果.

操作生成器

生成器支持三个方法: send, throw, close.

static PyMethodDef gen_methods[] = {
    {"send",(PyCFunction)_PyGen_Send, METH_O, send_doc},
    {"throw",(PyCFunction)gen_throw, METH_VARARGS, throw_doc},
    {"close",(PyCFunction)gen_close, METH_NOARGS, close_doc},
    {NULL, NULL}        /* Sentinel */
};

next(g) 是通过 send 实现的.

send 方法

def f():
    for i in range(10):
        yield i

g = f()
# 第一次调用 send 方法, 参数必须是 None
res = g.send(None)
static PyObject *
gen_send_ex(PyGenObject *gen, PyObject *arg, int exc, int closing)
{
    PyThreadState *tstate = _PyThreadState_GET();
    PyFrameObject *f = gen->gi_frame;
    PyObject *result;

    if (gen->gi_running) {
        const char *msg = "generator already executing";
        if (PyCoro_CheckExact(gen)) {
            msg = "coroutine already executing";
        }
        else if (PyAsyncGen_CheckExact(gen)) {
            msg = "async generator already executing";
        }
        PyErr_SetString(PyExc_ValueError, msg);
        return NULL;
    }
    if (f == NULL || f->f_stacktop == NULL) {
        if (PyCoro_CheckExact(gen) && !closing) {
            /* `gen` is an exhausted coroutine: raise an error,
               except when called from gen_close(), which should
               always be a silent method. */
            PyErr_SetString(
                PyExc_RuntimeError,
                "cannot reuse already awaited coroutine");
        }
        else if (arg && !exc) {
            /* `gen` is an exhausted generator:
               only set exception if called from send(). */
            if (PyAsyncGen_CheckExact(gen)) {
                PyErr_SetNone(PyExc_StopAsyncIteration);
            }
            else {
                PyErr_SetNone(PyExc_StopIteration);
            }
        }
        return NULL;
    }

    if (f->f_lasti == -1) {
        if (arg && arg != Py_None) {
            const char *msg = "can't send non-None value to a "
                              "just-started generator";
            if (PyCoro_CheckExact(gen)) {
                msg = NON_INIT_CORO_MSG;
            }
            else if (PyAsyncGen_CheckExact(gen)) {
                msg = "can't send non-None value to a "
                      "just-started async generator";
            }
            PyErr_SetString(PyExc_TypeError, msg);
            return NULL;
        }
    } else {
        /* Push arg onto the frame's value stack */
        result = arg ? arg : Py_None;
        Py_INCREF(result);
        *(f->f_stacktop++) = result;
    }

    /* Generators always return to their most recent caller, not
     * necessarily their creator. */
    Py_XINCREF(tstate->frame);
    assert(f->f_back == NULL);
    f->f_back = tstate->frame;

    gen->gi_running = 1;
    gen->gi_exc_state.previous_item = tstate->exc_info;
    tstate->exc_info = &gen->gi_exc_state;

    if (exc) {
        assert(_PyErr_Occurred(tstate));
        _PyErr_ChainStackItem(NULL);
    }

    result = _PyEval_EvalFrame(tstate, f, exc);
    tstate->exc_info = gen->gi_exc_state.previous_item;
    gen->gi_exc_state.previous_item = NULL;
    gen->gi_running = 0;

    /* Don't keep the reference to f_back any longer than necessary.  It
     * may keep a chain of frames alive or it could create a reference
     * cycle. */
    assert(f->f_back == tstate->frame);
    Py_CLEAR(f->f_back);

    /* If the generator just returned (as opposed to yielding), signal
     * that the generator is exhausted. */
    if (result && f->f_stacktop == NULL) {
        if (result == Py_None) {
            /* Delay exception instantiation if we can */
            if (PyAsyncGen_CheckExact(gen)) {
                PyErr_SetNone(PyExc_StopAsyncIteration);
            }
            else {
                PyErr_SetNone(PyExc_StopIteration);
            }
        }
        else {
            /* Async generators cannot return anything but None */
            assert(!PyAsyncGen_CheckExact(gen));
            _PyGen_SetStopIterationValue(result);
        }
        Py_CLEAR(result);
    }
    else if (!result && PyErr_ExceptionMatches(PyExc_StopIteration)) {
        const char *msg = "generator raised StopIteration";
        if (PyCoro_CheckExact(gen)) {
            msg = "coroutine raised StopIteration";
        }
        else if (PyAsyncGen_CheckExact(gen)) {
            msg = "async generator raised StopIteration";
        }
        _PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg);

    }
    else if (!result && PyAsyncGen_CheckExact(gen) &&
             PyErr_ExceptionMatches(PyExc_StopAsyncIteration))
    {
        /* code in `gen` raised a StopAsyncIteration error:
           raise a RuntimeError.
        */
        const char *msg = "async generator raised StopAsyncIteration";
        _PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg);
    }

    if (!result || f->f_stacktop == NULL) {
        /* generator can't be rerun, so release the frame */
        /* first clean reference cycle through stored exception traceback */
        _PyErr_ClearExcState(&gen->gi_exc_state);
        gen->gi_frame->f_gen = NULL;
        gen->gi_frame = NULL;
        Py_DECREF(f);
    }

    return result;
}

PyObject *
_PyGen_Send(PyGenObject *gen, PyObject *arg)
{
    return gen_send_ex(gen, arg, 0, 0);
}

gen_send_ex 的核心功能就是调用 _PyEval_EvalFrame 对生成器进行求值.

恢复栈指针

_PyEval_EvalFrameDefault 函数有下面的代码:

stack_pointer = f->f_stacktop;
assert(stack_pointer != NULL);
f->f_stacktop = NULL;       /* remains NULL unless yield suspends frame */
f->f_executing = 1;

stack_pointer 是函数局部变量, 指向栈顶. 如果是普通函数, f->f_stacktop 的值是在创建 PyFrameObject 时的初始化值, 指向 f->f_valuestack. 如果是生成器, 那么 f->f_stacktop 指向上一次的栈顶.

YIELD_VALUE

case TARGET(YIELD_VALUE): {
    retval = POP();

    if (co->co_flags & CO_ASYNC_GENERATOR) {
        PyObject *w = _PyAsyncGenValueWrapperNew(retval);
        Py_DECREF(retval);
        if (w == NULL) {
            retval = NULL;
            goto error;
        }
        retval = w;
    }
    // 保存栈顶指针
    f->f_stacktop = stack_pointer;
    goto exiting;
}

YIELD_VALUE 在返回之前将栈顶指针保存到 f->f_stacktop, 这样一来当重新执行生成器时, 才可以恢复之前的栈.

YIELD_FROM

case TARGET(YIELD_FROM): {
    PyObject *v = POP();
    PyObject *receiver = TOP();
    int err;
    if (PyGen_CheckExact(receiver) || PyCoro_CheckExact(receiver)) {
        retval = _PyGen_Send((PyGenObject *)receiver, v);
    } else {
        _Py_IDENTIFIER(send);
        if (v == Py_None)
            retval = Py_TYPE(receiver)->tp_iternext(receiver);
        else
            retval = _PyObject_CallMethodIdOneArg(receiver, &PyId_send, v);
    }
    Py_DECREF(v);
    if (retval == NULL) {
        // retval == NULL 说明 yield from 的 receiver 执行完了
        PyObject *val;
        if (tstate->c_tracefunc != NULL
                && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
            call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, f);
        // 从 StopIteration 对象中取出 value 属性并赋值给 val
        err = _PyGen_FetchStopIterationValue(&val);
        if (err < 0)
            goto error;
        Py_DECREF(receiver);
        // 将 val 保存到栈顶. 当执行 YIELD_FROM 的下一条字节码时, `ret = yield from receiver` 的 ret 就等于这里的 val.
        SET_TOP(val);
        DISPATCH();
    }
    /* receiver remains on stack, retval is value to be yielded */
    f->f_stacktop = stack_pointer;
    /* and repeat... */
    assert(f->f_lasti >= (int)sizeof(_Py_CODEUNIT));
    // f->f_lasti 倒退一步, 回到 YIELD_FROM. 不停执行 YIELD_FROM, 直到 receiver 耗尽.
    f->f_lasti -= sizeof(_Py_CODEUNIT);
    goto exiting;
}

YIELD_FROM 的前一条字节码 LOAD_CONST 0 (None), None 作为 YIELD_FROM 第一次执行发送到 receiver 的初始值, 也就是说 PyObject *v = POP(); 的 v 等于 None.

YIELD_FROM 第二次执行时, PyObject *v = POP(); 的 v 等于上层调用者调用 g.send(val) 发送的值.

throw 方法

throw 方法的基本原理是:

PyErr_Restore(typ, val, tb);
return gen_send_ex(gen, Py_None, 1, 0);

具体查看我在代码中的注释.

close

具体查看我在代码中的注释.

总结

看完生成器的实现代码, 终于理解了生成器的实现原理, 以及如何在生成器的基础上实现协程.

参考