# CPython compilation & bytecode

So, you wrote some Python code. What needs to happen before it starts running? How is it represented before and during execution? Find answers here.

Note: Everything here is specific to CPython – the reference implementation. Other Python interpreters, like PyPy, Jython or Batavia, do things very differently.

In [1]:
import os
import sys
import dis
import time
import inspect
import datetime

In [2]:
print(sys.version)

3.6.0 (default, Jan 31 2017, 00:05:46) 
[GCC 6.3.1 20161221 (Red Hat 6.3.1-1)]


---
# Lexical analysis
(tokenization)

In [3]:
import tokenize

In [4]:
!cat -n module.py

     1	a = 3
     2	b = 'Hello '
     3	print(a * b)
     4	
     5	def func(a=1, *b, **c):
     6	    return 7 + 3


In [5]:
!python3 -m tokenize module.py

0,0-0,0:            ENCODING       'utf-8'        
1,0-1,1:            NAME           'a'            
1,2-1,3:            OP             '='            
1,4-1,5:            NUMBER         '3'            
1,5-1,6:            NEWLINE        '\n'           
2,0-2,1:            NAME           'b'            
2,2-2,3:            OP             '='            
2,4-2,12:           STRING         "'Hello '"     
2,12-2,13:          NEWLINE        '\n'           
3,0-3,5:            NAME           'print'        
3,5-3,6:            OP             '('            
3,6-3,7:            NAME           'a'            
3,8-3,9:            OP             '*'            
3,10-3,11:          NAME           'b'            
3,11-3,12:          OP             ')'            
3,12-3,13:          NEWLINE        '\n'           
4,0-4,1:            NL             '\n'           
5,0-5,3:            NAME           'def'          
5,4-5,8:            NAME           'func'         
5,8-5,9:    

In [6]:
 with open('module.py' ,'rb') as f:
    for token in tokenize.tokenize(f.readline):
        print(token)

TokenInfo(type=59 (ENCODING), string='utf-8', start=(0, 0), end=(0, 0), line='')
TokenInfo(type=1 (NAME), string='a', start=(1, 0), end=(1, 1), line='a = 3\n')
TokenInfo(type=53 (OP), string='=', start=(1, 2), end=(1, 3), line='a = 3\n')
TokenInfo(type=2 (NUMBER), string='3', start=(1, 4), end=(1, 5), line='a = 3\n')
TokenInfo(type=4 (NEWLINE), string='\n', start=(1, 5), end=(1, 6), line='a = 3\n')
TokenInfo(type=1 (NAME), string='b', start=(2, 0), end=(2, 1), line="b = 'Hello '\n")
TokenInfo(type=53 (OP), string='=', start=(2, 2), end=(2, 3), line="b = 'Hello '\n")
TokenInfo(type=3 (STRING), string="'Hello '", start=(2, 4), end=(2, 12), line="b = 'Hello '\n")
TokenInfo(type=4 (NEWLINE), string='\n', start=(2, 12), end=(2, 13), line="b = 'Hello '\n")
TokenInfo(type=1 (NAME), string='print', start=(3, 0), end=(3, 5), line='print(a * b)\n')
TokenInfo(type=53 (OP), string='(', start=(3, 5), end=(3, 6), line='print(a * b)\n')
TokenInfo(type=1 (NAME), string='a', start=(3, 6), end=(3, 7), l

### Summary

When Python reads source code, it first converts it to a stream of *tokens* – word-like units of a language.

Two of Python's tokens are fairly unique among programming languages: `INDENT` and `DEDENT`.

---

# Parsing
(Abstract Syntax Tree)

In [7]:
import ast

In [8]:
!cat -n module.py

     1	a = 3
     2	b = 'Hello '
     3	print(a * b)
     4	
     5	def func(a=1, *b, **c):
     6	    return 7 + 3


In [9]:
with open('module.py' ,'rb') as f:
    tree = ast.parse(f.read())

print(tree)

<_ast.Module object at 0x7fc7bc9142e8>


In [10]:
print(tree._fields)

('body',)


In [11]:
for node in tree.body:
    print(node.lineno, node)

1 <_ast.Assign object at 0x7fc7bc914320>
2 <_ast.Assign object at 0x7fc7bc914438>
3 <_ast.Expr object at 0x7fc7bc9144e0>
5 <_ast.FunctionDef object at 0x7fc7bc914630>


In [12]:
def dump_ast(node, indent=''):
    print('{}{}'.format(indent, type(node).__name__))
    for name, value in ast.iter_fields(node):
        if isinstance(value, ast.AST):
            print('{}  .{}:'.format(indent, name))
            dump_ast(value, indent + '    ')
        elif isinstance(value, list):
            if not value:
                print('{}  .{}: []'.format(indent, name))
            else:
                print('{}  .{}: ['.format(indent, name))
                for child in value:
                    dump_ast(child, indent + '    ')
                print('{}  ]'.format(indent))
        else:
            print('{}  .{} = {}'.format(indent, name, repr(value)))

dump_ast(tree)

Module
  .body: [
    Assign
      .targets: [
        Name
          .id = 'a'
          .ctx:
            Store
      ]
      .value:
        Num
          .n = 3
    Assign
      .targets: [
        Name
          .id = 'b'
          .ctx:
            Store
      ]
      .value:
        Str
          .s = 'Hello '
    Expr
      .value:
        Call
          .func:
            Name
              .id = 'print'
              .ctx:
                Load
          .args: [
            BinOp
              .left:
                Name
                  .id = 'a'
                  .ctx:
                    Load
              .op:
                Mult
              .right:
                Name
                  .id = 'b'
                  .ctx:
                    Load
          ]
          .keywords: []
    FunctionDef
      .name = 'func'
      .args:
        arguments
          .args: [
            arg
              .arg = 'a'
              .annotation = None
          ]
          .vararg

See also: "Full Grammar Specification" in the Python docs

## Summary

The token stream is parsed, using Python's grammar, to arrive at the Abstract Syntax Tree – a graph-like representation of the program.

It is possible to modify the AST, or generate it programmatically (even though the tools in the standard library aren't too usable). People have used this to add Lisp-like macros to Python, for example.

---
# Code Objects

In [13]:
!cat module.py

a = 3
b = 'Hello '
print(a * b)

def func(a=1, *b, **c):
    return 7 + 3


In [14]:
filename = os.path.abspath('module.py')
print(filename)

/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py


In [15]:
code = compile(tree, filename=filename, mode='exec')

print(code)

<code object <module> at 0x7fc7bd137e40, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 1>


In [16]:
exec(code)

Hello Hello Hello 


In [17]:
def dump_code_attrs(code):
    print(code)
    for attr_name in dir(code):
        if not attr_name.startswith('__'):
            print('  {}: {}'.format(attr_name, getattr(code, attr_name)))

dump_code_attrs(code)

<code object <module> at 0x7fc7bd137e40, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 1>
  co_argcount: 0
  co_cellvars: ()
  co_code: b'd\x00Z\x00d\x01Z\x01e\x02e\x00e\x01\x14\x00\x83\x01\x01\x00d\x06d\x03d\x04\x84\x01Z\x03d\x05S\x00'
  co_consts: (3, 'Hello ', 1, <code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>, 'func', None, (1,))
  co_filename: /home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py
  co_firstlineno: 1
  co_flags: 64
  co_freevars: ()
  co_kwonlyargcount: 0
  co_lnotab: b'\x04\x01\x04\x01\x0c\x02'
  co_name: <module>
  co_names: ('a', 'b', 'print', 'func')
  co_nlocals: 0
  co_stacksize: 3
  co_varnames: ()


In [18]:
print(dis.code_info(code))

Name:              <module>
Filename:          /home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py
Argument count:    0
Kw-only arguments: 0
Number of locals:  0
Stack size:        3
Flags:             NOFREE
Constants:
   0: 3
   1: 'Hello '
   2: 1
   3: <code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>
   4: 'func'
   5: None
   6: (1,)
Names:
   0: a
   1: b
   2: print
   3: func


In [19]:
func_code = code.co_consts[3]
dump_code_attrs(func_code)
print(dis.code_info(func_code))

<code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>
  co_argcount: 1
  co_cellvars: ()
  co_code: b'd\x03S\x00'
  co_consts: (None, 7, 3, 10)
  co_filename: /home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py
  co_firstlineno: 5
  co_flags: 79
  co_freevars: ()
  co_kwonlyargcount: 0
  co_lnotab: b'\x00\x01'
  co_name: func
  co_names: ()
  co_nlocals: 3
  co_stacksize: 2
  co_varnames: ('a', 'b', 'c')
Name:              func
Filename:          /home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py
Argument count:    1
Kw-only arguments: 0
Number of locals:  3
Stack size:        2
Flags:             OPTIMIZED, NEWLOCALS, VARARGS, VARKEYWORDS, NOFREE
Constants:
   0: None
   1: 7
   2: 3
   3: 10
Variable names:
   0: a
   1: b
   2: c


### Summary
The Abstract Syntax Tree can be compiled down to a *code object*, a structure that holds all details of the program in a compact form – ready to be executed or saved.

The code objects contains, among other things, variable names and constants used in the code. Functions defined in the code are also represented as constants: other code objects!

---
# Bytecode

In [20]:
!cat module.py

a = 3
b = 'Hello '
print(a * b)

def func(a=1, *b, **c):
    return 7 + 3


In [22]:
code.co_code

b'd\x00Z\x00d\x01Z\x01e\x02e\x00e\x01\x14\x00\x83\x01\x01\x00d\x06d\x03d\x04\x84\x01Z\x03d\x05S\x00'

In [21]:
print(list(b for b in code.co_code))

[100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0]


In [23]:
dis.dis(code)

  1           0 LOAD_CONST               0 (3)
              2 STORE_NAME               0 (a)

  2           4 LOAD_CONST               1 ('Hello ')
              6 STORE_NAME               1 (b)

  3           8 LOAD_NAME                2 (print)
             10 LOAD_NAME                0 (a)
             12 LOAD_NAME                1 (b)
             14 BINARY_MULTIPLY
             16 CALL_FUNCTION            1
             18 POP_TOP

  5          20 LOAD_CONST               6 ((1,))
             22 LOAD_CONST               3 (<code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>)
             24 LOAD_CONST               4 ('func')
             26 MAKE_FUNCTION            1
             28 STORE_NAME               3 (func)
             30 LOAD_CONST               5 (None)
             32 RETURN_VALUE


In [24]:
!python3 -m dis module.py

  1           0 LOAD_CONST               0 (3)
              2 STORE_NAME               0 (a)

  2           4 LOAD_CONST               1 ('Hello ')
              6 STORE_NAME               1 (b)

  3           8 LOAD_NAME                2 (print)
             10 LOAD_NAME                0 (a)
             12 LOAD_NAME                1 (b)
             14 BINARY_MULTIPLY
             16 CALL_FUNCTION            1
             18 POP_TOP

  5          20 LOAD_CONST               6 ((1,))
             22 LOAD_CONST               3 (<code object func at 0x7fe649304e40, file "module.py", line 5>)
             24 LOAD_CONST               4 ('func')
             26 MAKE_FUNCTION            1
             28 STORE_NAME               3 (func)
             30 LOAD_CONST               5 (None)
             32 RETURN_VALUE


In [25]:
def print_dis(code):
    line_starts = [a for a, b in dis.findlinestarts(code)]
    for instr in dis.get_instructions(code):
        if instr.offset in line_starts:
            print()
        print('{i.offset:2}    {i.opcode:3} {i.opname:20} {arg:>4} ({i.argrepr})'.format(
                i=instr,
                arg='-' if instr.arg is None else instr.arg))

print_dis(code)


 0    100 LOAD_CONST              0 (3)
 2     90 STORE_NAME              0 (a)

 4    100 LOAD_CONST              1 ('Hello ')
 6     90 STORE_NAME              1 (b)

 8    101 LOAD_NAME               2 (print)
10    101 LOAD_NAME               0 (a)
12    101 LOAD_NAME               1 (b)
14     20 BINARY_MULTIPLY         - ()
16    131 CALL_FUNCTION           1 ()
18      1 POP_TOP                 - ()

20    100 LOAD_CONST              6 ((1,))
22    100 LOAD_CONST              3 (<code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>)
24    100 LOAD_CONST              4 ('func')
26    132 MAKE_FUNCTION           1 ()
28     90 STORE_NAME              3 (func)
30    100 LOAD_CONST              5 (None)
32     83 RETURN_VALUE            - ()


In [26]:
print(list(b for b in code.co_code))

[100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0]


In [27]:
print(dis.opmap['LOAD_CONST'])
print(dis.opname[100])

100
LOAD_CONST


In [28]:
len(dis.opmap)

118

### Summary
The *bytecode* is a set of instructions for the Python interpreter – a stack-based virtual machine. This is the most important part of a code object – it tells Python what to do.

Each instruction is represented either as a single byte (instructions with opcodes under a certain number), or as three bytes: a one-byte instruction and two-byte argument.

## Classes
Now it starts getting interesting

In [30]:
def silly_function():
    print(1, 2, sep=', ', end='.\n')

    def foo():
        print()

#dump_code_attrs(silly_function.__code__)
print_dis(silly_function.__code__)


 0    116 LOAD_GLOBAL             0 (print)
 2    100 LOAD_CONST              1 (1)
 4    100 LOAD_CONST              2 (2)
 6    100 LOAD_CONST              3 (', ')
 8    100 LOAD_CONST              4 ('.\n')
10    100 LOAD_CONST              5 (('sep', 'end'))
12    141 CALL_FUNCTION_KW        4 ()
14      1 POP_TOP                 - ()

16    100 LOAD_CONST              6 (<code object foo at 0x7fc7bd137d20, file "<ipython-input-30-cfb485f9b4d8>", line 4>)
18    100 LOAD_CONST              7 ('silly_function.<locals>.foo')
20    132 MAKE_FUNCTION           0 ()
22    125 STORE_FAST              0 (foo)
24    100 LOAD_CONST              0 (None)
26     83 RETURN_VALUE            - ()


In [31]:
def make_class():
    class ClassExample:
        "a docstring"
        def __init__(self, name):
            self.name = name
    return ClassExample

#dump_code_attrs(make_class.__code__)
print_dis(make_class.__code__)


 0     71 LOAD_BUILD_CLASS        - ()
 2    100 LOAD_CONST              1 (<code object ClassExample at 0x7fc7bc9188a0, file "<ipython-input-31-794a3072d5cc>", line 2>)
 4    100 LOAD_CONST              2 ('ClassExample')
 6    132 MAKE_FUNCTION           0 ()
 8    100 LOAD_CONST              2 ('ClassExample')
10    131 CALL_FUNCTION           2 ()
12    125 STORE_FAST              0 (ClassExample)

14    124 LOAD_FAST               0 (ClassExample)
16     83 RETURN_VALUE            - ()


In [32]:
help(__build_class__)

Help on built-in function __build_class__ in module builtins:

__build_class__(...)
    __build_class__(func, name, *bases, metaclass=None, **kwds) -> class
    
    Internal helper function used by the class statement.



In [33]:
class_code = make_class.__code__.co_consts[1]
#dump_code_attrs(class_code)
print_dis(class_code)


 0    101 LOAD_NAME               0 (__name__)
 2     90 STORE_NAME              1 (__module__)
 4    100 LOAD_CONST              0 ('make_class.<locals>.ClassExample')
 6     90 STORE_NAME              2 (__qualname__)

 8    100 LOAD_CONST              1 ('a docstring')
10     90 STORE_NAME              3 (__doc__)

12    100 LOAD_CONST              2 (<code object __init__ at 0x7fc7bd137f60, file "<ipython-input-31-794a3072d5cc>", line 4>)
14    100 LOAD_CONST              3 ('make_class.<locals>.ClassExample.__init__')
16    132 MAKE_FUNCTION           0 ()
18     90 STORE_NAME              4 (__init__)
20    100 LOAD_CONST              4 (None)
22     83 RETURN_VALUE            - ()


In [34]:
print(make_class())

<class '__main__.make_class.<locals>.ClassExample'>


In [35]:
make_class().__module__

'__main__'

In [36]:
make_class().__qualname__

'make_class.<locals>.ClassExample'

In [37]:
make_class().__doc__

'a docstring'

In [38]:
make_class().__init__

<function __main__.make_class.<locals>.ClassExample.__init__>

---
# Serialization

In [39]:
!cat -n module.py

     1	a = 3
     2	b = 'Hello '
     3	print(a * b)
     4	
     5	def func(a=1, *b, **c):
     6	    return 7 + 3


In [40]:
with open('__pycache__/module.cpython-36.pyc', 'rb') as f:
    module_bytes = f.read()

print(list(module_bytes))

[51, 13, 13, 10, 119, 11, 212, 88, 74, 0, 0, 0, 227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 64, 0, 0, 0, 115, 34, 0, 0, 0, 100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0, 41, 7, 233, 3, 0, 0, 0, 122, 6, 72, 101, 108, 108, 111, 32, 233, 1, 0, 0, 0, 99, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 79, 0, 0, 0, 115, 4, 0, 0, 0, 100, 3, 83, 0, 41, 4, 78, 233, 7, 0, 0, 0, 114, 1, 0, 0, 0, 233, 10, 0, 0, 0, 169, 0, 41, 3, 218, 1, 97, 218, 1, 98, 218, 1, 99, 114, 5, 0, 0, 0, 114, 5, 0, 0, 0, 250, 64, 47, 104, 111, 109, 101, 47, 112, 118, 105, 107, 116, 111, 114, 105, 47, 100, 101, 118, 47, 115, 108, 105, 100, 101, 115, 47, 50, 48, 49, 55, 45, 48, 51, 45, 50, 51, 45, 98, 101, 108, 103, 114, 97, 100, 101, 45, 98, 121, 116, 101, 99, 111, 100, 101, 47, 109, 111, 100, 117, 108, 101, 46, 112, 121, 218, 4, 102, 117, 110, 99, 5, 0, 0, 0, 115, 2, 0, 0, 0, 0, 1, 114, 10, 0, 0, 0, 78, 41, 1, 114, 2, 0, 0, 0, 41,

In [41]:
import marshal

marshalled = marshal.dumps(code)
print(marshal.loads(marshalled))


<code object <module> at 0x7fc7bc918660, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 1>


In [42]:
print(list(marshalled))


[227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 64, 0, 0, 0, 243, 34, 0, 0, 0, 100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0, 41, 7, 233, 3, 0, 0, 0, 250, 6, 72, 101, 108, 108, 111, 32, 233, 1, 0, 0, 0, 227, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 79, 0, 0, 0, 115, 4, 0, 0, 0, 100, 3, 83, 0, 41, 4, 78, 233, 7, 0, 0, 0, 114, 2, 0, 0, 0, 233, 10, 0, 0, 0, 169, 0, 41, 3, 218, 1, 97, 218, 1, 98, 218, 1, 99, 114, 8, 0, 0, 0, 114, 8, 0, 0, 0, 250, 64, 47, 104, 111, 109, 101, 47, 112, 118, 105, 107, 116, 111, 114, 105, 47, 100, 101, 118, 47, 115, 108, 105, 100, 101, 115, 47, 50, 48, 49, 55, 45, 48, 51, 45, 50, 51, 45, 98, 101, 108, 103, 114, 97, 100, 101, 45, 98, 121, 116, 101, 99, 111, 100, 101, 47, 109, 111, 100, 117, 108, 101, 46, 112, 121, 218, 4, 102, 117, 110, 99, 5, 0, 0, 0, 115, 2, 0, 0, 0, 0, 1, 114, 13, 0, 0, 0, 78, 169, 1, 114, 4, 0, 0, 0, 41, 4, 114, 9, 0, 0, 0, 114, 10, 0, 0, 0, 218, 5

In [43]:
print(list(module_bytes))

[51, 13, 13, 10, 119, 11, 212, 88, 74, 0, 0, 0, 227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 64, 0, 0, 0, 115, 34, 0, 0, 0, 100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0, 41, 7, 233, 3, 0, 0, 0, 122, 6, 72, 101, 108, 108, 111, 32, 233, 1, 0, 0, 0, 99, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 79, 0, 0, 0, 115, 4, 0, 0, 0, 100, 3, 83, 0, 41, 4, 78, 233, 7, 0, 0, 0, 114, 1, 0, 0, 0, 233, 10, 0, 0, 0, 169, 0, 41, 3, 218, 1, 97, 218, 1, 98, 218, 1, 99, 114, 5, 0, 0, 0, 114, 5, 0, 0, 0, 250, 64, 47, 104, 111, 109, 101, 47, 112, 118, 105, 107, 116, 111, 114, 105, 47, 100, 101, 118, 47, 115, 108, 105, 100, 101, 115, 47, 50, 48, 49, 55, 45, 48, 51, 45, 50, 51, 45, 98, 101, 108, 103, 114, 97, 100, 101, 45, 98, 121, 116, 101, 99, 111, 100, 101, 47, 109, 111, 100, 117, 108, 101, 46, 112, 121, 218, 4, 102, 117, 110, 99, 5, 0, 0, 0, 115, 2, 0, 0, 0, 0, 1, 114, 10, 0, 0, 0, 78, 41, 1, 114, 2, 0, 0, 0, 41,

In [44]:
marshal.loads(module_bytes[12:]) == code

True

---

In [45]:
print(module_bytes[0:4])
list(module_bytes[0:4])

b'3\r\r\n'


[51, 13, 13, 10]

In [46]:
import importlib.util
print(importlib.util.MAGIC_NUMBER)
list(importlib.util.MAGIC_NUMBER)

b'3\r\r\n'


[51, 13, 13, 10]

---

In [47]:
print(module_bytes[4:8])
list(module_bytes[4:8])

b'w\x0b\xd4X'


[119, 11, 212, 88]

In [48]:
stamp_value = int.from_bytes(module_bytes[4:8], 'little')
stamp_value

1490291575

In [49]:
datetime.datetime.fromtimestamp(stamp_value).isoformat(' ')

'2017-03-23 18:52:55'

---

In [50]:
print(module_bytes[8:12])
list(module_bytes[8:12])

b'J\x00\x00\x00'


[74, 0, 0, 0]

In [51]:
int.from_bytes(module_bytes[8:12], 'little')

74

In [52]:
with open('module.py', 'rb') as f:
    print(len(f.read()))

74


In [54]:
os.stat('module.py')

os.stat_result(st_mode=33204, st_ino=2529913, st_dev=64768, st_nlink=1, st_uid=1000, st_gid=1000, st_size=74, st_atime=1490379765, st_mtime=1490291575, st_ctime=1490291575)


### Summary
The `.pyc` file stores the result of the compiler's hard work, so it doesn't have to compile again if the source file is not changed. It is generated (and read) as part of Python's import machinery – i.e. from "import" statements, not when running a module directly using `python module.py` or `python -m module`.

The “meat” of a `.pyc` file is a code object serialized using the `marshal` module – an efficient format that only supports values that can appear as constants in Python files.
The `.pyc` file adds a header to that, leading to this overall structure:

* Bytes 0-3: Magic number (identifies a bytecode version)
* Bytes 4-7: Modification time of source file
* Bytes 8-11: Length of source file (in bytes)
* Bytes 12 and on: `marshal`-ed code object

---
# Functions, Code Objects and Signatures

In [55]:
def make_adder(a):
    def adder(b:int=2, *args, c: "(unused)"=3, **kwargs) -> int:
        "A silly function"
        result = a + b
        print('debug:', result)
        return result
    return adder

adder = make_adder(1)

print(adder(2))
print(adder(3))

debug: 3
3
debug: 4
4


In [56]:
print('__code__: ', adder.__code__)
print('__defaults__: ', adder.__defaults__)
print('__kwdefaults__: ', adder.__kwdefaults__)
print('__qualname__: ', adder.__qualname__)
print('__doc__: ', adder.__doc__)
print('__annotations__: ', adder.__annotations__)
print('__globals__: a', type(adder.__globals__).__name__)
print('__closure__: ', adder.__closure__)

__code__:  <code object adder at 0x7fc7bc918540, file "<ipython-input-55-8ecfda3da029>", line 2>
__defaults__:  (2,)
__kwdefaults__:  {'c': 3}
__qualname__:  make_adder.<locals>.adder
__doc__:  A silly function
__annotations__:  {'b': <class 'int'>, 'c': '(unused)', 'return': <class 'int'>}
__globals__: a dict
__closure__:  (<cell at 0x7fc7c01c6378: int object at 0x7fc7d1d54640>,)


In [None]:
dump_code_attrs(adder.__code__)
dis.show_code(adder)

In [57]:
sig = inspect.signature(adder)
print(sig)

(b:int=2, *args, c:'(unused)'=3, **kwargs) -> int


In [58]:
sig.parameters

mappingproxy({'args': <Parameter "*args">,
              'b': <Parameter "b:int=2">,
              'c': <Parameter "c:'(unused)'=3">,
              'kwargs': <Parameter "**kwargs">})

In [59]:
for name, arg in sig.parameters.items():
    print('{a.name}: kind={a.kind}, default={a.default!r}, annotation={a.annotation!r}'.format(a=arg))

b: kind=1, default=2, annotation=<class 'int'>
args: kind=2, default=<class 'inspect._empty'>, annotation=<class 'inspect._empty'>
c: kind=3, default=3, annotation='(unused)'
kwargs: kind=4, default=<class 'inspect._empty'>, annotation=<class 'inspect._empty'>


In [60]:
sig.return_annotation

int

In [61]:
bound_args = sig.bind(1, c=2, d=3)
bound_args

<BoundArguments (b=1, c=2, kwargs={'d': 3})>

In [62]:
adder(*bound_args.args, **bound_args.kwargs)

debug: 2


2

### Summary
Python's *functions* are mutable objects, each of which holds an immutable code object. Data related to code execution, like variable names and constant values, are stored in the code object. Extra data that's only needed when calling the function, such as default argument values, is stored on the function.

Data related to arguments – their names, values, and annotations – are stored in a format that's convenient for calling and running the function. When you need to inspect them, you can use a helper called `inspect.signature`, which exposes them in a more usable way.

---
# Line Numbers

In [63]:
def oops():
    print(1/0)

print_dis(oops.__code__)


 0    116 LOAD_GLOBAL             0 (print)
 2    100 LOAD_CONST              1 (1)
 4    100 LOAD_CONST              2 (0)
 6     27 BINARY_TRUE_DIVIDE      - ()
 8    131 CALL_FUNCTION           1 ()
10      1 POP_TOP                 - ()
12    100 LOAD_CONST              0 (None)
14     83 RETURN_VALUE            - ()


In [64]:
oops()

ZeroDivisionError: division by zero

In [None]:
!cat module.py

In [65]:
dump_code_attrs(code)

<code object <module> at 0x7fc7bd137e40, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 1>
  co_argcount: 0
  co_cellvars: ()
  co_code: b'd\x00Z\x00d\x01Z\x01e\x02e\x00e\x01\x14\x00\x83\x01\x01\x00d\x06d\x03d\x04\x84\x01Z\x03d\x05S\x00'
  co_consts: (3, 'Hello ', 1, <code object func at 0x7fc7bd137ed0, file "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py", line 5>, 'func', None, (1,))
  co_filename: /home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/module.py
  co_firstlineno: 1
  co_flags: 64
  co_freevars: ()
  co_kwonlyargcount: 0
  co_lnotab: b'\x04\x01\x04\x01\x0c\x02'
  co_name: <module>
  co_names: ('a', 'b', 'print', 'func')
  co_nlocals: 0
  co_stacksize: 3
  co_varnames: ()


In [66]:
!python -m dis module.py

  1           0 LOAD_CONST               0 (3)
              2 STORE_NAME               0 (a)

  2           4 LOAD_CONST               1 ('Hello ')
              6 STORE_NAME               1 (b)

  3           8 LOAD_NAME                2 (print)
             10 LOAD_NAME                0 (a)
             12 LOAD_NAME                1 (b)
             14 BINARY_MULTIPLY
             16 CALL_FUNCTION            1
             18 POP_TOP

  5          20 LOAD_CONST               6 ((1,))
             22 LOAD_CONST               3 (<code object func at 0x7fa54fd8ae40, file "module.py", line 5>)
             24 LOAD_CONST               4 ('func')
             26 MAKE_FUNCTION            1
             28 STORE_NAME               3 (func)
             30 LOAD_CONST               5 (None)
             32 RETURN_VALUE


In [67]:
list(code.co_lnotab)

[4, 1, 4, 1, 12, 2]

In [68]:
list(zip(code.co_lnotab[::2], code.co_lnotab[1::2]))

[(4, 1), (4, 1), (12, 2)]

### Summary
When Python needs to determine which line number a particular bytecode offset belongs to, it can use the line number table, or `lnotab`. The lnotab holds pairs of bytes saying, effectively, how many bytes correspond to how many lines.

A full explanation is in the file `Objects/lnotab_notes.txt` in Python sources.

---
# Frames
Code objects, functions, and frames

In [69]:
def inner():
    a = 1
    frame = inspect.currentframe()
    print(frame.f_lineno)
    return frame
def outer():
    return inner()

frame = outer()
frame

4


<frame at 0x7fc7bc913048>

In [70]:
print('f_code:', frame.f_code)
print('f_lasti:', frame.f_lasti)
print('f_lineno:', frame.f_lineno)
print('f_locals:', frame.f_locals)
print('f_back:', frame.f_back)

f_code: <code object inner at 0x7fc7bc05e6f0, file "<ipython-input-69-898de4c7057a>", line 1>
f_lasti: 24
f_lineno: 5
f_locals: {'frame': <frame object at 0x7fc7bc913048>, 'a': 1}
f_back: <frame object at 0x7fc7bc913408>


In [71]:
print('f_code:', frame.f_back.f_code)
print('f_lasti:', frame.f_back.f_lasti)
print('f_lineno:', frame.f_back.f_lineno)
print('f_locals:', frame.f_back.f_locals)
print('f_back:', frame.f_back.f_back)

f_code: <code object outer at 0x7fc7bc05e810, file "<ipython-input-69-898de4c7057a>", line 6>
f_lasti: 4
f_lineno: 7
f_locals: {}
f_back: <frame object at 0x7fc7bc9139a8>


In [72]:
def get_current_line(frame):
    lines = inspect.getsourcelines(frame.f_code)[0]
    index = frame.f_lineno - frame.f_code.co_firstlineno
    return '{}: {}'.format(frame.f_code.co_name, lines[index])

print(get_current_line(frame))
print(get_current_line(frame.f_back))


inner:     return frame

outer:     return inner()



In [73]:
import traceback
def recurse(wait=10):
    if wait < 0:
        return traceback.format_stack()
    else:
        return recurse(wait-1)

print(''.join(recurse()))

  File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/__venv__/lib64/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/__venv__/lib64/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/__venv__/lib64/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/__venv__/lib64/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/pviktori/dev/slides/2017-03-23-belgrade-bytecode/__venv__/lib64/python3.6/s

### Summary
When code is executed, there exists a `frame` object for each running function. This object contains the current instruction, values of local variables, and a pointer to the "parent" frame, from which the current function was called.

A traceback is generated by walking from the current frame to its parent, its parent's parent, and so on. Each time the current line is looked up based on the current instruction. (This lookup only takes place when the traceback is actually printed).

---
# Questions ?

<br>

> Petr Viktorin
>
> encukou@gmail.com
>
> @encukou

In [77]:
!ls

'Addendums for Python Code Internals.ipynb'  __pycache__
demo.py					     'Python Code Internals.ipynb'
module.py				     __venv__
notes.txt


In [75]:
!ls __pycache__

module.cpython-36.pyc
