In [3]:
import os
import sys
import dis

# The Future: No more bytecode!

In [4]:
print(sys.version)

3.6.0a2+ (default, Jun 22 2016, 13:01:41) 
[GCC 6.1.1 20160510 (Red Hat 6.1.1-2)]


In [5]:
!cat module.py

a = 3
b = 'Hello '
print(a * b)

def func(a=1, *b, **c):
    return 7 + 3


In [6]:
!python -m dis module.py

  1           0 LOAD_CONST               0 (3)
              2 STORE_NAME               0 (a)

  2           4 LOAD_CONST               1 ('Hello ')
              6 STORE_NAME               1 (b)

  3           8 LOAD_NAME                2 (print)
             10 LOAD_NAME                0 (a)
             12 LOAD_NAME                1 (b)
             14 BINARY_MULTIPLY
             16 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             18 POP_TOP

  5          20 LOAD_CONST               6 ((1,))
             22 LOAD_CONST               3 (<code object func at 0x7f3317b891c0, file "module.py", line 5>)
             24 LOAD_CONST               4 ('func')
             26 MAKE_FUNCTION            1
             28 STORE_NAME               3 (func)
             30 LOAD_CONST               5 (None)
             32 RETURN_VALUE


In [7]:
filename = os.path.abspath('module.py')
with open(filename) as f:
    code = compile(f.read(), filename=filename, mode='exec')
code

<code object <module> at 0x7fbc842cbd00, file "/home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py", line 1>

In [8]:
print(list(code.co_code))

[100, 0, 90, 0, 100, 1, 90, 1, 101, 2, 101, 0, 101, 1, 20, 0, 131, 1, 1, 0, 100, 6, 100, 3, 100, 4, 132, 1, 90, 3, 100, 5, 83, 0]


In [9]:
def print_dis(code):
    line_starts = [a for a, b in dis.findlinestarts(code)]
    for instr in dis.get_instructions(code):
        if instr.offset in line_starts:
            print()
        print('{i.offset:2}:  {i.opcode:3} {arg:>6}: {i.opname}({i.argrepr})'.format(
                i=instr,
                arg='-' if instr.arg is None else instr.arg))

print_dis(code)


 0:  100      0: LOAD_CONST(3)
 2:   90      0: STORE_NAME(a)

 4:  100      1: LOAD_CONST('Hello ')
 6:   90      1: STORE_NAME(b)

 8:  101      2: LOAD_NAME(print)
10:  101      0: LOAD_NAME(a)
12:  101      1: LOAD_NAME(b)
14:   20      -: BINARY_MULTIPLY()
16:  131      1: CALL_FUNCTION(1 positional, 0 keyword pair)
18:    1      -: POP_TOP()

20:  100      6: LOAD_CONST((1,))
22:  100      3: LOAD_CONST(<code object func at 0x7fbc85c51400, file "/home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py", line 5>)
24:  100      4: LOAD_CONST('func')
26:  132      1: MAKE_FUNCTION()
28:   90      3: STORE_NAME(func)
30:  100      5: LOAD_CONST(None)
32:   83      -: RETURN_VALUE()


### Summary
Starting with Python 3.6, there will be no more bytecode. Python is switching to *wordcode*, where each instruction is two bytes long: a byte for the instruction, and a byte for the argument.
The benefit is that wordcode is faster to run: computers are apparently better at handling two-byte units than a mix of one- and three-byte ones.

The Python developers are using this opportunity to iron out some warts of the instruction set.

---
# Extra section I - extended arguments

In [10]:
def print_dis(code):
    line_starts = [a for a, b in dis.findlinestarts(code)]
    for instr in dis.get_instructions(code):
        if instr.offset in line_starts:
            print()
        print('{i.offset:2}:  {i.opcode:3} {arg:>6}: {i.opname}({i.argrepr})'.format(
                i=instr,
                arg='-' if instr.arg is None else instr.arg))


In [11]:
source = ''.join('a={}\n'.format(i) for i in range(260))
code = compile(source, '<file>', 'exec')

print(90, dis.opname[90])
print(100, dis.opname[100])
print(144, dis.opname[144])
print()

print(list(code.co_code))
print_dis(code)

90 STORE_NAME
100 LOAD_CONST
144 EXTENDED_ARG

[100, 0, 90, 0, 100, 1, 90, 0, 100, 2, 90, 0, 100, 3, 90, 0, 100, 4, 90, 0, 100, 5, 90, 0, 100, 6, 90, 0, 100, 7, 90, 0, 100, 8, 90, 0, 100, 9, 90, 0, 100, 10, 90, 0, 100, 11, 90, 0, 100, 12, 90, 0, 100, 13, 90, 0, 100, 14, 90, 0, 100, 15, 90, 0, 100, 16, 90, 0, 100, 17, 90, 0, 100, 18, 90, 0, 100, 19, 90, 0, 100, 20, 90, 0, 100, 21, 90, 0, 100, 22, 90, 0, 100, 23, 90, 0, 100, 24, 90, 0, 100, 25, 90, 0, 100, 26, 90, 0, 100, 27, 90, 0, 100, 28, 90, 0, 100, 29, 90, 0, 100, 30, 90, 0, 100, 31, 90, 0, 100, 32, 90, 0, 100, 33, 90, 0, 100, 34, 90, 0, 100, 35, 90, 0, 100, 36, 90, 0, 100, 37, 90, 0, 100, 38, 90, 0, 100, 39, 90, 0, 100, 40, 90, 0, 100, 41, 90, 0, 100, 42, 90, 0, 100, 43, 90, 0, 100, 44, 90, 0, 100, 45, 90, 0, 100, 46, 90, 0, 100, 47, 90, 0, 100, 48, 90, 0, 100, 49, 90, 0, 100, 50, 90, 0, 100, 51, 90, 0, 100, 52, 90, 0, 100, 53, 90, 0, 100, 54, 90, 0, 100, 55, 90, 0, 100, 56, 90, 0, 100, 57, 90, 0, 100, 58, 90, 0, 100, 59, 90, 0, 10

### Summary
In case the argument doesn't fit in a single byte, a special `EXTENDED_ARG` operation is prepended. It supplies a byte's worth of extra data to the next instruction.

---
# Extra section II - Different Kinds of Variables

In [12]:
def print_dis(code):
    line_starts = [a for a, b in dis.findlinestarts(code)]
    for instr in dis.get_instructions(code):
        if instr.offset in line_starts:
            print()
        print('{i.offset:2}:  {i.opcode:3} {arg:>6}: {i.opname}({i.argrepr})'.format(
                i=instr,
                arg='-' if instr.arg is None else instr.arg))

def dump_code_attrs(code):
    print(code)
    for attr_name in dir(code):
        if not attr_name.startswith('__'):
            print('  {}: {}'.format(attr_name, getattr(code, attr_name)))


In [13]:
!cat -n module.py

     1	a = 3
     2	b = 'Hello '
     3	print(a * b)
     4	
     5	def func(a=1, *b, **c):
     6	    return 7 + 3


In [14]:
filename = os.path.abspath('module.py')
with open(filename) as f:
    code = compile(f.read(), filename=filename, mode='exec')

dump_code_attrs(code)
print()
print(dis.code_info(code))
print_dis(code)

<code object <module> at 0x7fbc842cba00, file "/home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py", line 1>
  co_argcount: 0
  co_cellvars: ()
  co_code: b'd\x00Z\x00d\x01Z\x01e\x02e\x00e\x01\x14\x00\x83\x01\x01\x00d\x06d\x03d\x04\x84\x01Z\x03d\x05S\x00'
  co_consts: (3, 'Hello ', 1, <code object func at 0x7fbc842cbc40, file "/home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py", line 5>, 'func', None, (1,))
  co_filename: /home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py
  co_firstlineno: 1
  co_flags: 64
  co_freevars: ()
  co_kwonlyargcount: 0
  co_lnotab: b'\x04\x01\x04\x01\x0c\x02'
  co_name: <module>
  co_names: ('a', 'b', 'print', 'func')
  co_nlocals: 0
  co_stacksize: 3
  co_varnames: ()

Name:              <module>
Filename:          /home/pviktori/dev/slides/2016-06-22-pysilesia-bytecode/module.py
Argument count:    0
Kw-only arguments: 0
Number of locals:  0
Stack size:        3
Flags:             NOFREE
Constants:
   0: 3
   1: '

In [15]:
n = 3
def repeat_string(a):
    b = 'hello'
    return a * n * b

dump_code_attrs(repeat_string.__code__)
print()
print(dis.code_info(repeat_string.__code__))
print_dis(repeat_string.__code__)

<code object repeat_string at 0x7fbc842cb640, file "<ipython-input-15-3410d72e56a2>", line 2>
  co_argcount: 1
  co_cellvars: ()
  co_code: b'd\x01}\x01|\x00t\x00\x14\x00|\x01\x14\x00S\x00'
  co_consts: (None, 'hello')
  co_filename: <ipython-input-15-3410d72e56a2>
  co_firstlineno: 2
  co_flags: 67
  co_freevars: ()
  co_kwonlyargcount: 0
  co_lnotab: b'\x00\x01\x04\x01'
  co_name: repeat_string
  co_names: ('n',)
  co_nlocals: 2
  co_stacksize: 2
  co_varnames: ('a', 'b')

Name:              repeat_string
Filename:          <ipython-input-15-3410d72e56a2>
Argument count:    1
Kw-only arguments: 0
Number of locals:  2
Stack size:        2
Flags:             OPTIMIZED, NEWLOCALS, NOFREE
Constants:
   0: None
   1: 'hello'
Names:
   0: n
Variable names:
   0: a
   1: b

 0:  100      1: LOAD_CONST('hello')
 2:  125      1: STORE_FAST(b)

 4:  124      0: LOAD_FAST(a)
 6:  116      0: LOAD_GLOBAL(n)
 8:   20      -: BINARY_MULTIPLY()
10:  124      1: LOAD_FAST(b)
12:   20      -: BINARY_

In [None]:
a = 3
def make_class():
    class SomeClass:
        b = 'hello'
        print(a * b)

consts = make_class.__code__.co_consts
print(dict(enumerate(type(t).__name__ for t in consts)))

class_code = consts[1]
print()
dump_code_attrs(class_code)
print()
print(dis.code_info(class_code))
print_dis(class_code)

### Summary
There are several different kinds of variables:

* Named variables, which are always looked up by name. Module-level code uses these, and they're also used by class code.
* Global variables, which are looked up by name in the global (module) namespace. Functions and classes use these when needed.
* "Fast local" variables, which are accessed by index, so the access is very fast. Functions use these. This means functions need to know all names of variables at compile time, which is why `import *` is not allowed in functions.
* (There are also cell variables, which are used in closures to access the outer namespace.)

In [16]:
import module

Hello Hello Hello 


In [17]:
module.__dict__

{'__builtins__': {'ArithmeticError': ArithmeticError,
  'AssertionError': AssertionError,
  'AttributeError': AttributeError,
  'BaseException': BaseException,
  'BlockingIOError': BlockingIOError,
  'BrokenPipeError': BrokenPipeError,
  'BufferError': BufferError,
  'ChildProcessError': ChildProcessError,
  'ConnectionAbortedError': ConnectionAbortedError,
  'ConnectionError': ConnectionError,
  'ConnectionRefusedError': ConnectionRefusedError,
  'ConnectionResetError': ConnectionResetError,
  'EOFError': EOFError,
  'Ellipsis': Ellipsis,
  'EnvironmentError': OSError,
  'Exception': Exception,
  'False': False,
  'FileExistsError': FileExistsError,
  'FileNotFoundError': FileNotFoundError,
  'FloatingPointError': FloatingPointError,
  'GeneratorExit': GeneratorExit,
  'IOError': OSError,
  'ImportError': ImportError,
  'IndentationError': IndentationError,
  'IndexError': IndexError,
  'InterruptedError': InterruptedError,
  'IsADirectoryError': IsADirectoryError,
  'KeyError': KeyEr

In [21]:
module.func.__globals__['f'] = 3

In [22]:
module.f

3

In [23]:
module.__dict__['f']

3

In [27]:
class Foo:
    ...

Foo.f = 123
Foo.__add__ = lambda self, other: self + other

In [33]:
dct = {'a': 1, 'b': 2}
yeah = type(Foo.__dict__)(dct)

In [32]:
yeah['a']

1

In [34]:
dct[5] = 6
yeah[5]

6

In [36]:
type(4)("9")

9