## Inspecting the memory layout of a Python list

based on https://jakevdp.github.io/blog/2014/05/09/why-python-is-slow/

In [1]:
import sys
import ctypes
import numpy

In [2]:
x=[1,[2,3]]
print(type(x), type(x[0]))

<class 'list'> <class 'int'>


In [3]:
# what is the data type for integers?
N=10000
print(N, "has type", type(N))

10000 has type <class 'int'>


In [4]:
# size in bytes:
a=46560000000000000000000000000000000000000

print("a has type", type(a))

print("bytes used to store 1:", sys.getsizeof(1))
print("bytes used to store a:", sys.getsizeof(a))

a has type <class 'int'>
bytes used to store 1: 28
bytes used to store a: 44


In [5]:
class IntStruct(ctypes.Structure): # PyLongObject
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_digit", ctypes.c_long)]

This is how an int is represented in memory:

In [6]:
x = 100 # change to a really large number
y = 100

print(id(x), id(y), id(100), id(1234567))

info=IntStruct.from_address(id(x))
print(info.ob_refcnt, info.ob_type, info.ob_size, info.ob_digit)
# In Python, the reference count for 'x' is 167 because other variables are using 100. Python
# saves 100 as a reference instead of creating and destroying x=100 when needed. 'x' is simply
# reference to 100.

1968646870480 1968646870480 1968646870480 1968755635632
167 140713268776912 1 0


How about a List?

In [7]:
class ListStruct(ctypes.Structure): # struct PyListObject
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_item", ctypes.c_long),  # PyObject** pointer cast to long
                ("allocated", ctypes.c_ulong)]

x = []#[1,[2,3]]
x.append(1)
x.append(2)
x.append(3)
x.append(4)
info = ListStruct.from_address(id(x))
print(type(x), info.ob_refcnt, info.ob_size, info.allocated, info.ob_item)


<class 'list'> 1 4 1660614288 0


What happens if we append to the list x?

In [8]:
y = x # this increases reference count
x.append(4) # this will resize the array
#x.append(4)
info = ListStruct.from_address(id(x))
print(type(x), info.ob_refcnt, info.ob_size, info.allocated, info.ob_item)


<class 'list'> 2 5 1660774192 0


In [9]:
x

[1, 2, 3, 4, 4]