# Object Mutability and Interning

Today's Topics

* Variables and Memory References
* Reference Counting
* Garbage Collection
* Dynamic vs Static Typing
* Variable Re-Assignment
* Object Mutability
* Function Arguments & Mutability
* Shared References & Mutability
* Variable Equality
* Everything is an Object
* Python Optimizations: Interning
* Python Optimizations: String Interning
* Python Optimizations: Peephole

# Variables and Memory References

In [5]:
my_var = 10 
print(f'my_var = {my_var}')
print(f'memory of the my_var (decimal) = {id(my_var)}')
print(f'memory of the my_var (hex) = {hex(id(my_var))}')

my_var = 10
memory of the my_var (decimal) = 139674772743520
memory of the my_var (hex) = 0x7f089136a560


In [6]:
a = 10
print(f'memory of the my_var (hex) = {hex(id(a))}')

memory of the my_var (hex) = 0x7f089136a560


In [7]:
a = 10000
print(f'memory of the my_var (hex) = {hex(id(a))}')
b = 10000
print(f'memory of the my_var (hex) = {hex(id(b))}')

memory of the my_var (hex) = 0x7f085f8b9bf0
memory of the my_var (hex) = 0x7f085f8b99d0


In [8]:
greeting = 'Hello' 
print(f'greeting = {greeting}')
print(f'memory of the greeting (decimal) = {id(greeting)}')
print(f'memory of the greeting (hex) = {hex(id(greeting))}')

greeting = Hello
memory of the greeting (decimal) = 139673939778736
memory of the greeting (hex) = 0x7f085f909cb0


### Reference Counting

In [26]:
import ctypes

def ref_count(address):
    return ctypes.c_long.from_address(address).value

In [27]:
my_var = [1, 2, 3, 4]
ref_count(id(my_var))

1

In [28]:
import sys

sys.getrefcount(my_var)

2

In [29]:
other_var = my_var

In [30]:
print(hex(id(my_var))), print(hex(id(other_var)))

0x7f085eee0640
0x7f085eee0640


(None, None)

In [31]:
ref_count(id(my_var))

2

In [32]:
sys.getrefcount(my_var)

3

In [33]:
other_var = None

In [34]:
ref_count(id(my_var))

1

In [35]:
sys.getrefcount(my_var)

2

In [36]:
my_var = None
ref_count(id(my_var))

47553

In [37]:
id(None)

139674772416240

In [38]:
id(my_var)

139674772416240

### Garbage Collection

In [39]:
import ctypes
import gc

In [40]:
def ref_count(address):
    return ctypes.c_long.from_address(address).value

In [42]:
def object_by_id(object_id):
    for obj in gc.get_objects():
        if id(obj) == object_id:
            return "Object exists"
    return "Not found"

In [77]:
class A:
    def __init__(self): # 650
        self.b = B(self) # B(650) == 610
        print(f'A: self: {hex(id(self)), hex(id(self.b))}')

class B:
    def __init__(self, a): # a = 650 self = 610
        self.a = a # self.a = 650
        print(f'B: self: {hex(id(self)), hex(id(self.a))}') # self = 610

In [78]:
gc.disable()

In [79]:
my_var = A()

B: self: ('0x7f085e416d90', '0x7f085e416cd0')
A: self: ('0x7f085e416cd0', '0x7f085e416d90')


In [80]:
hex(id(my_var))

'0x7f085e416cd0'

In [81]:
print(f'my_var: {hex(id(my_var))}') # 1
print(f'my_var.b: {hex(id(my_var.b))}') # 2
print(f'my_var.b.a: {hex(id(my_var.b.a))}') # 3

my_var: 0x7f085e416cd0
my_var.b: 0x7f085e416d90
my_var.b.a: 0x7f085e416cd0


In [82]:
a_id = id(my_var)
b_id = id(my_var.b)

In [83]:
print(f'refcount(a) = {ref_count(a_id)}')
print(f'refcount(b) = {ref_count(b_id)}')
print(f'a: {object_by_id(a_id)}')
print(f'b: {object_by_id(b_id)}')

refcount(a) = 2
refcount(b) = 1
a: Object exists
b: Object exists


In [84]:
my_var = None

In [85]:
print(f'refcount(a) = {ref_count(a_id)}')
print(f'refcount(b) = {ref_count(b_id)}')
print(f'a: {object_by_id(a_id)}')
print(f'b: {object_by_id(b_id)}')

refcount(a) = 1
refcount(b) = 1
a: Object exists
b: Object exists


In [86]:
gc.collect()
print(f'refcount(a) = {ref_count(a_id)}')
print(f'refcount(b) = {ref_count(b_id)}')
print(f'a: {object_by_id(a_id)}')
print(f'b: {object_by_id(b_id)}')

refcount(a) = 0
refcount(b) = 0
a: Not found
b: Not found


In [87]:
gc.enable()

### Dynamic Typing

In [88]:
a = "hello"
type(a)

str

In [89]:
a = 10
type(a)

int

In [90]:
a = lambda x: x ** 2
a(2)

4

In [91]:
type(a)

function

### Variable Re-assignment

In [92]:
a = 10
hex(id(a))

'0x7f089136a560'

In [93]:
a = 15
hex(id(a))

'0x7f089136a600'

In [94]:
a = 5
hex(id(a))

'0x7f089136a4c0'

In [95]:
a = a + 1
hex(id(a))

'0x7f089136a4e0'

In [96]:
a = 10
b = 10
hex(id(a)), hex(id(b))

('0x7f089136a560', '0x7f089136a560')

In [97]:
a = 10000
b = 10000
hex(id(a)), hex(id(b))

('0x7f085e4bed30', '0x7f085e4be470')

### Object Mutability

In [99]:
a = 10
print(hex(id(a)))
a = 20
print(hex(id(a)))

0x7f089136a560
0x7f089136a6a0


In [100]:
my_list = [1, 2, 3]
print(my_list)
print(hex(id(my_list)))

[1, 2, 3]
0x7f085ec7ad70


In [101]:
my_list.append(4)
print(my_list)
print(hex(id(my_list)))

[1, 2, 3, 4]
0x7f085ec7ad70


In [102]:
my_list_1 = [1, 2, 3]
print(my_list_1)
print(hex(id(my_list_1)))

[1, 2, 3]
0x7f085ecfa190


In [104]:
my_list_1 = my_list_1 + [4]
print(my_list_1)
print(hex(id(my_list_1)))

[1, 2, 3, 4]
0x7f085e659640


In [105]:
k = 10
l = 11
k, l = l, k
print(k, l)

11 10


In [106]:
my_dict = dict(key1 = 'value 1')
print(my_dict)
print(hex(id(my_dict)))

{'key1': 'value 1'}
0x7f085e406780


In [107]:
my_dict['key1'] = 'modified value 10'
print(my_dict)
print(hex(id(my_dict)))

{'key1': 'modified value 10'}
0x7f085e406780


In [108]:
my_dict['key2'] = 'value2'
print(my_dict)
print(hex(id(my_dict)))

{'key1': 'modified value 10', 'key2': 'value2'}
0x7f085e406780


Now consider the immutable sequence type: **tuple**

In [113]:
t = (1, 2, 3)

In [116]:
a = [1, 2]
b = [3, 4]
t = (a, b)


([1, 2], [3, 4])

In [118]:
a.append(3)
b.append(5)
print(t)

([1, 2, 3], [3, 4, 5])


### Function Arguments and Mutability

In [119]:
def process(s):
    print(f'initial s # = {hex(id(s))}')
    s = s + 'world'
    print(f's after the change  # = {hex(id(s))}')

In [121]:
my_var = 'hello'
print(f'my_var #= {hex(id(my_var))}')

my_var #= 0x7f085e7c27f0


In [122]:
process(my_var)

initial s # = 0x7f085e7c27f0
s after the change  # = 0x7f085e8fa130


In [123]:
print(f'my_var #= {hex(id(my_var))}')

my_var #= 0x7f085e7c27f0


In [127]:
o = "world"
print(id(o))
p = "world"
print(id(o))

139673923309168
139673923309168


In [132]:
# working with mutable objects

def modify_list(items):
    print(f'initial items #= {hex(id(items))}')
    if len(items) > 0:
        items[0] = items[0] ** 2
    items.pop()
    items.append(5)
    print(f'final items #= {hex(id(items))}')


In [134]:
my_list = [2, 3, 4]
print(f'my_list #= {hex(id(my_list))}')

modify_list(my_list)

print(f'my_list #= {hex(id(my_list))}')

my_list #= 0x7f085f01caf0
initial items #= 0x7f085f01caf0
final items #= 0x7f085f01caf0
my_list #= 0x7f085f01caf0


In [135]:
my_list

[4, 3, 5]

In [137]:
def modify_tuple(t):
    print(f'{hex(id(t))}')
    t[0].append(100)
    print(f'{hex(id(t))}')

my_tuple = ([1, 2], 'a', 45.09)

print(f'{hex(id(my_tuple))}')

modify_tuple(my_tuple)

print(f'{hex(id(my_tuple))}')

0x7f085edc7b40
0x7f085edc7b40
0x7f085edc7b40
0x7f085edc7b40


### Shared References and Mutability

In [138]:
my_var_1 = 'hello'

my_var_2 = my_var_1

print(my_var_1)
print(my_var_2)

hello
hello


In [139]:
print(hex(id(my_var_1)))
print(hex(id(my_var_2)))

0x7f085e7c27f0
0x7f085e7c27f0


In [140]:
my_var_2 = my_var_2 + ' world!'

print(hex(id(my_var_1)))
print(hex(id(my_var_2)))

0x7f085e7c27f0
0x7f085e4b9230


In [141]:
my_list_1 = [1, 2, 3]

my_list_2 = my_list_1

print(my_list_1)
print(my_list_2)

[1, 2, 3]
[1, 2, 3]


In [142]:
print(hex(id(my_list_1)))
print(hex(id(my_list_2)))

0x7f085edde500
0x7f085edde500


In [143]:
my_list_2.append(4)

print(my_list_1)
print(my_list_2)

print(hex(id(my_list_1)))
print(hex(id(my_list_2)))

[1, 2, 3, 4]
[1, 2, 3, 4]
0x7f085edde500
0x7f085edde500


In [144]:
a = 10 
b = 10

hex(id(a)), hex(id(b))

('0x7f089136a560', '0x7f089136a560')

In [145]:
b = 15
hex(id(a)), hex(id(b))

('0x7f089136a560', '0x7f089136a600')

In [146]:
my_list_1 = [1, 2, 3]
my_list_2 = [1, 2 , 3]

In [147]:
hex(id(my_list_1)), hex(id(my_list_2))

('0x7f085e4052d0', '0x7f085ef1c3c0')

### Variable Equality

In [148]:
a = 10 
b = 10

hex(id(a)), hex(id(b))

('0x7f089136a560', '0x7f089136a560')

In [149]:
print("a is b: ", a is b)

a is b:  True


In [152]:
a = 10
b = 10

hex(id(a)), hex(id(b))

print("a is b: ", a is b)
print("a is b: ", a == b)

a is b:  True
a is b:  True


In [153]:
a = 10
b = 10.0

hex(id(a)), hex(id(b))

print("a is b: ", a is b)
print("a == b: ", a == b)

a is b:  False
a is b:  True


In [154]:
print(type(a), type(b))

<class 'int'> <class 'float'>


In [155]:
hex(id(a)), hex(id(b))

('0x7f089136a560', '0x7f085e5a3470')

In [156]:
c = 10 + 0j
print(type(c))


print("a is c: ", a is c)
print("a == c: ", a == c)

<class 'complex'>
a is c:  False
a == c:  True


In [157]:
type(None )

NoneType

In [158]:
print(None)

None


In [161]:
print(hex(id(None)))

0x7f089131a6f0


In [162]:
a = None
print(type(a))
print(hex(id(a)))


<class 'NoneType'>
0x7f089131a6f0


In [163]:
a is None 

True

In [164]:
a == None

True

In [165]:
b = None
print(type(b))
print(hex(id(b)))

<class 'NoneType'>
0x7f089131a6f0


In [166]:
a is b

True

In [167]:
l = []
type(l), l is None

(list, False)

In [168]:
l == None

False

## Everything is an Object

In [169]:
a = 10

In [170]:
print(type(a))

<class 'int'>


In [171]:
a = lambda x:x**2
print(type(a))

<class 'function'>


In [172]:
b = int(10)
print(type(b))

<class 'int'>


In [174]:
help(int)

Help on class int in module builtins:

class int(object)
 |  int([x]) -> integer
 |  int(x, base=10) -> integer
 |  
 |  Convert a number or string to an integer, or return 0 if no arguments
 |  are given.  If x is a number, return x.__int__().  For floating point
 |  numbers, this truncates towards zero.
 |  
 |  If x is not a number or if base is given, then x must be a string,
 |  bytes, or bytearray instance representing an integer literal in the
 |  given base.  The literal can be preceded by '+' or '-' and be surrounded
 |  by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
 |  Base 0 means to interpret the base from the string as an integer literal.
 |  >>> int('0b100', base=0)
 |  4
 |  
 |  Methods defined here:
 |  
 |  __abs__(self, /)
 |      abs(self)
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __bool__(self, /)
 |      self != 0
 |  
 |  __ceil__(...)
 |      Ceiling of

In [175]:
b = int('10')
b

10

In [177]:
def square(a):
    return a ** 2

print(type(square))

<class 'function'>


In [179]:
f = square

type(f), f is square

(function, True)

In [180]:
f(2)

4

In [181]:
def cube(a):
    return a ** 3

def sel_function(fn_id):
    if fn_id == 1:
        return square
    else:
        return cube

In [182]:
f = sel_function(1)
print(hex(id(f)))
print(hex(id(square)))
print(hex(id(cube)))
print(type(f))
print('f is square', f is square)
print('f is cube', f is cube)

0x7f085eab7440
0x7f085eab7440
0x7f085eb0b290
<class 'function'>
f is square True
f is cube False


In [183]:
f = sel_function(2)
print(hex(id(f)))
print(hex(id(square)))
print(hex(id(cube)))
print(type(f))
print('f is square', f is square)
print('f is cube', f is cube)

0x7f085eb0b290
0x7f085eab7440
0x7f085eb0b290
<class 'function'>
f is square False
f is cube True


In [184]:
sel_function(1)(5)

25

In [190]:
sel_function(1 + 0j)(5)

25

In [193]:
1 is 1 + 0j

False

In [194]:
def exec_function(fn, n):
    return fn(n)

In [197]:
result = exec_function(square, 2)
print(result)

4


### Python Interning


In [198]:
a = 10
b = 10
id(a), id(b)

(139674772743520, 139674772743520)

In [212]:
a = 256
b = 256
id(a), id(b) # -5 to 256

(139674772751392, 139674772751392)

This is because Python "pre-caching" the interger range from -5 to 256

In [214]:
a = -5
b = -5
id(a), id(b)

(139674772743040, 139674772743040)

The integers in the range [-5, 256] are essentially **singleton** objects.

In [215]:
a = 10
b = int(10)
c = int('10')
d = int('1010', 2)

print(a, b, c, d)
a is b, b is c, c is d

10 10 10 10


(True, True, True)

### Python Optimizations: String Interning

identifiers = variable names, function names, class names, etc.. interned.. 

Python will also intern string literals that _look_ like identifiers.

In [218]:
a = 'hello'
b = 'hello'
id(a), id(b)

(139673921660912, 139673921660912)

In [219]:
a = 'hello, world!'
b = 'hello, world!'
id(a), id(b)

(139673922490224, 139673922490288)

In [221]:
a = 'hello_world'
b = 'hello_world'
id(a), id(b)

(139673919156464, 139673919156464)

In [222]:
def func!!():
    pass

SyntaxError: invalid syntax (<ipython-input-222-6ff625680554>, line 1)

In [223]:
a = 'this_is_a_abrudly_long_sentense_that_doesnt_seem_to_end_today_or_may_be_tomorrow'
b = 'this_is_a_abrudly_long_sentense_that_doesnt_seem_to_end_today_or_may_be_tomorrow'
id(a), id(b)

(139673917316384, 139673917316384)

In [224]:
a = '1_hello_world'
b = '1_hello_world'
id(a), id(b)

(139673921826416, 139673921826416)

In [225]:
a = '1 hello_world'
b = '1 hello_world'
id(a), id(b)

(139673921825712, 139673921823984)

In [227]:
a = 'this_is_a_abrudly_long_sentense_that_doesnt_seem_to_end_today_or_may_be_tomorrow'
b = 'this_is_a_abrudly_long_sentense_that_doesnt_seem_to_end_today_or_may_be_tomorrow'
print('a==b', a == b)
print('a is b', a is b)

a==b True
a is b True


In [228]:
import sys

In [229]:
a = sys.intern('hello world')
b = sys.intern('hello world')
c = 'hello world'

id(a), id(b), id(c)

(139673922617840, 139673922617840, 139673922619952)

In [230]:
print('a==b', a == b)
print('a is b', a is b)

a==b True
a is b True


In [231]:
print('a==c', a == c)
print('a is c', a is c)

a==c True
a is c False


In [233]:
def compare_using_equals(n):
    a = 'a long string that is not interned' * 200
    b = 'a long string that is not interned' * 200
    for i in range(n):
        if a == b:
            pass

In [234]:
def compare_using_interning(n):
    a = sys.intern('a long string that is not interned' * 200)
    b = sys.intern('a long string that is not interned' * 200)
    for i in range(n):
        if a is b:
            pass

In [237]:
import time

start = time.perf_counter()
compare_using_equals(100000000)
end = time.perf_counter()
print(f'Equality time: {end - start}')

Equality time: 14.166366203979123


In [238]:
import time

start = time.perf_counter()
compare_using_interning(100000000)
end = time.perf_counter()
print(f'Indentity time: {end - start}')

Indentity time: 3.004822214017622


### Python Optimizagtions: Peephole

##### Constant Expressions

In [251]:
def my_func():
    a = 20 * 60
    b = (1, 2) * 5
    c = 'abc' * 3
    d = 'ad' * 11
    e = 'the quick brown fox' * 10
    f = [1, 2] * 5


In [246]:
my_func.__code__.co_consts

(None,
 1200,
 (1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
 'abcabcabc',
 'adadadadadadadadadadad',
 'the quick brown fox',
 10000,
 1,
 2,
 5)

#### Membership Tests



In [242]:
my_func()

In [247]:
def my_func():
    if e in [1, 2, 3]:
        pass

In [248]:
my_func.__code__.co_consts

(None, (1, 2, 3))

In [252]:
print(my_func())

None


In [253]:
def my_func():
    if e in {1, 2, 3}:
        pass

my_func.__code__.co_consts

(None, frozenset({1, 2, 3}))

In [255]:
import string
import time

char_list = list(string.ascii_letters)
char_tuple = tuple(string.ascii_letters)
char_set = set(string.ascii_letters)

print(char_list)
print()
print(char_tuple)
print()
print(char_set)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')

{'a', 'g', 'p', 'X', 'z', 'h', 'r', 'l', 'F', 'Q', 'A', 'j', 'K', 'M', 'N', 'J', 'y', 'm', 'H', 'P', 'e', 'I', 'Y', 'V', 'o', 'u', 'U', 'W', 'f', 'B', 'Z', 'v', 'k', 't', 'L', 'E', 'w', 'c', 'O', 'G', 'd', 'C', 'i', 'q', 'b', 'S', 'n', 's', 'R', 'D', 'T', 'x'}


In [256]:
def membership_test(n, container):
    for i in range(n):
        if 'p' in container: 
            pass

In [259]:
start = time.perf_counter()
membership_test(10000000, char_list)
end = time.perf_counter()
print(f'list membership: {end - start}')

list membership: 2.8887458440149203


In [260]:
start = time.perf_counter()
membership_test(10000000, char_tuple)
end = time.perf_counter()
print(f'tuple membership: {end - start}')

tuple membership: 2.981632637005532


In [261]:
start = time.perf_counter()
membership_test(10000000, char_set)
end = time.perf_counter()
print(f'set membership: {end - start}')

set membership: 0.38675689199590124


In [262]:
dd = sys.intern(100)

TypeError: intern() argument 1 must be str, not int