# CPU and Memory Profiling

Profiling CPU and memory with Python.

## Timing code

We can use `time.time()` to time code. Let's define a few different versions of determining if a number is a prime number.

In [1]:
def is_prime_fn1(x):
    for i in range(2, x-1):
        if x % i == 0:
            return False
    return True

def is_prime_fn2(x):
    for i in range(2, int(x**.5)+1):
        if x % i == 0:
            return False
    return True

is_prime_lambda = lambda x: all(x % i != 0 for i in range(2, int(x**.5)+1))

def get_primes(y, func):
    _primes = []
    for val in range(y):
        if func(val):
            _primes.append(val)
    return _primes

We can use `time` module to compare the different methods.

In [3]:
import time

start_time = time.time()
get_primes(1000, is_prime_fn1)
print(f"is_prime_fn1: elapsed time = {time.time() - start_time} seconds")

start_time = time.time()
get_primes(1000, is_prime_fn2)
print(f"is_prime_fn2: elapsed time = {time.time() - start_time} seconds")

start_time = time.time()
get_primes(1000, is_prime_lambda)
print(f"is_prime_lambda: elapsed time = {time.time() - start_time} seconds")

is_prime_fn1: elapsed time = 0.018995285034179688 seconds
is_prime_fn2: elapsed time = 0.002004384994506836 seconds
is_prime_lambda: elapsed time = 0.004999637603759766 seconds


We can use `time.perf_counter()` for system wide elapsed time which includes sleeps and waits and anything else going on in the system when running.

In [9]:
start_time = time.perf_counter()            # time.clock() deprecated in Python 3.8
get_primes(10000, is_prime_fn1)
print(f"is_prime_fn1: elapsed time = {time.perf_counter()- start_time} seconds")

start_time = time.perf_counter()
get_primes(10000, is_prime_fn2)
print(f"is_prime_fn2: elapsed time = {time.perf_counter()- start_time} seconds")

start_time = time.perf_counter()
get_primes(10000, is_prime_lambda)
print(f"is_prime_lambda: elapsed time = {time.perf_counter()- start_time} seconds")

is_prime_fn1: elapsed time = 0.9100839999737218 seconds
is_prime_fn2: elapsed time = 0.020037800015415996 seconds
is_prime_lambda: elapsed time = 0.046718800003873184 seconds


We can use `time.process_time()` if only want time for this process, excluding any sleeps...

In [8]:
start_time = time.process_time()
get_primes(10000, is_prime_fn1)
print(f"is_prime_fn1: elapsed time = {time.process_time()- start_time} seconds")

start_time = time.process_time()
get_primes(10000, is_prime_fn2)
print(f"is_prime_fn2: elapsed time = {time.process_time()- start_time} seconds")

start_time = time.process_time()
get_primes(10000, is_prime_lambda)
print(f"is_prime_lambda: elapsed time = {time.process_time()- start_time} seconds")

is_prime_fn1: elapsed time = 0.9375 seconds
is_prime_fn2: elapsed time = 0.03125 seconds
is_prime_lambda: elapsed time = 0.046875 seconds


We can use `time.process_time_ns()` for nanoseconds counting...

In [10]:
start_time = time.perf_counter_ns()
print("Hello!")
print(f"print elapsed time = {time.perf_counter_ns()- start_time}ns")

Hello!
print elapsed time = 1043000ns


We can use `timeit.timeit()` if you prefer...

In [11]:
import timeit

print(timeit.timeit("get_primes(10000, is_prime_fn1)", globals=globals(), number=5))
print(timeit.timeit("get_primes(10000, is_prime_fn2)", globals=globals(), number=5))
print(timeit.timeit("get_primes(10000, is_prime_lambda)", globals=globals(), number=5))

4.405430500017246
0.10138859998551197
0.2076089000038337


We can create and use a decorator if we so wish...

In [12]:
import statistics       # in standard library since Python 3.4/PEP450

def time_primes(number_tests=1):
    def my_time_prime_decorator(func):
        def time_prime_execution(*args, **kwargs):
            _tests = []
            for t in range(1, number_tests):
                _start_time = time.process_time()
                func(*args, **kwargs)
                _end_time = time.process_time()
                _tests.append(_end_time - _start_time)
            print("Number of tests executed =", number_tests)
            print("Mean execution time =", statistics.mean(_tests))
            print("Standard deviation execution time =", statistics.stdev(_tests))
        return time_prime_execution
    return my_time_prime_decorator

@time_primes(number_tests=10)
def get_primes(y, func):
    _primes = []
    for val in range(y):
        if func(val):
            _primes.append(val)
    return _primes

get_primes(10000, is_prime_fn1)
get_primes(10000, is_prime_fn2)
get_primes(10000, is_prime_lambda)

Number of tests executed = 10
Mean execution time = 0.8055555555555556
Standard deviation execution time = 0.08235098073355154
Number of tests executed = 10
Mean execution time = 0.022569444444444444
Standard deviation execution time = 0.01135129933213717
Number of tests executed = 10
Mean execution time = 0.041666666666666664
Standard deviation execution time = 0.015625


For large lists of data it is often much faster to create a dictionary than perform linear search...

In [17]:
my_big_data_list = [(7263, 'bob'), (221333, 'sally'), (212892, 'simon')]
# linear search
start_time = time.perf_counter_ns()
for x in my_big_data_list:
    if x[0] == 221333:
        print(x)
print(f"print elapsed time = {time.perf_counter_ns()- start_time}ns")
# or use a dictionary
start_time = time.perf_counter_ns()
my_big_data_list_dict = {x[0]: x for x in my_big_data_list}
print(my_big_data_list_dict[221333])
print(f"print elapsed time = {time.perf_counter_ns()- start_time}ns")


(221333, 'sally')
print elapsed time = 830700ns
(221333, 'sally')
print elapsed time = 466500ns


## Memory usage

Some objects have the `__sizeof__` attribute.

In [22]:
import sys

my_number_list = [1, 2, 3, 4]
print("type(my_number_list.__sizeof__) =", str(type(my_number_list.__sizeof__))[1:-1])
print("my_number_list.__sizeof__() =", my_number_list.__sizeof__())               # raw object size
print("sys.getsizeof(1) =", sys.getsizeof(1))
print("sys.getsizeof(my_number_list) =", sys.getsizeof(my_number_list))    # __sizeof__ + garbage collector overhead
print("len(my_number_list) * (1).__sizeof__() =", len(my_number_list) * (1).__sizeof__())

type(my_number_list.__sizeof__) = class 'builtin_function_or_method'
my_number_list.__sizeof__() = 72
sys.getsizeof(1) = 28
sys.getsizeof(my_number_list) = 88
len(my_number_list) * (1).__sizeof__() = 112


Note that the total size of elements greater than list object size! What about a string list of the same length as the number list?

In [24]:
my_string_list = ['Hello world', 'My name is Bob', 'Quick brown fox', 'Phillip the cat']
print("my_string_list.__sizeof__() =", my_string_list.__sizeof__())         # raw object size
print("sys.getsizeof(my_string_list) =", sys.getsizeof(my_string_list))     # __sizeof__ + garbage collector overhead

my_string_list.__sizeof__() = 72
sys.getsizeof(my_string_list) = 88


String list is the same size as the number list.

So now let's include the size of the actual items in each of the lists.

In [27]:
print("sys.getsizeof(my_number_list) + sys.getsizeof(1) + ... =",
      sys.getsizeof(my_number_list) +
      sys.getsizeof(1) +
      sys.getsizeof(2) +
      sys.getsizeof(3) +
      sys.getsizeof(4))
print("sys.getsizeof(my_string_list) + sys.getsizeof('Hello world') + ... =",
      sys.getsizeof(my_string_list) +
      sys.getsizeof('Hello world') +
      sys.getsizeof('My name is Bob') +
      sys.getsizeof('Quick brown fox') +
      sys.getsizeof('Phillip the cat'))


sys.getsizeof(my_number_list) + sys.getsizeof(1) + ... = 200
sys.getsizeof(my_string_list) + sys.getsizeof('Hello world') + ... = 339


So sizes do differ now we are totalling everything. We can also use `asizeof` instead.

In [28]:
from pympler import asizeof
print("asizeof.asizeof(my_number_list) =", asizeof.asizeof(my_number_list))
print("asizeof.asizeof(my_string_list) =", asizeof.asizeof(my_string_list))

asizeof.asizeof(my_number_list) = 216
asizeof.asizeof(my_string_list) = 344


Using `tracemalloc`...

In [None]:
import tracemalloc

tracemalloc.start()
trace_malloc_vector = [z for z in range(1000)]
memory_snapshot = tracemalloc.take_snapshot()
stats = memory_snapshot.statistics('lineno')
for stat in stats[:10]:
    print(stat)

Using `guppy`...

In [31]:
from guppy import hpy

h = hpy()
h.setrelheap()
my_number_heaped_list = ['Red', 'Brown', 'Green', 'Blue']
print(h.heap())

Partition of a set of 17 objects. Total size = 2232 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0      3  18     1328  59      1328  59 types.FrameType
     1      1   6      176   8      1504  67 types.CodeType
     2      2  12      152   7      1656  74 list
     3      2  12      112   5      1768  79 tuple
     4      1   6      104   5      1872  84 dict of ast.Interactive
     5      3  18       92   4      1964  88 int
     6      2  12       84   4      2048  92 bytes
     7      1   6       72   3      2120  95 weakref.ReferenceType
     8      1   6       64   3      2184  98 types.MethodType
     9      1   6       48   2      2232 100 ast.Interactive


Using `memory_profiler`...

In [None]:
from memory_profiler import profile

@profile
def my_func():
    a = [1] * (10 ** 6)
    b = [2] * (2 * 10 ** 7)
    del b
    return a

my_func()

Using `objgraph`...

In [32]:
import objgraph

objgraph.show_most_common_types()
objgraph.show_growth(limit=3)
my_new_number_list = [1, 2, 3, 4]
my_new_string_list = ['Hello world', 'My name is Bob', 'Quick brown fox', 'Phillip the cat']
objgraph.show_growth()

dict                       27332
function                   22305
list                       15769
tuple                      15601
Operator                   7926
ReferenceType              7713
DFAPlan                    5418
Name                       5008
PythonNode                 4288
builtin_function_or_method 3815
dict        27321    +27321
function    22305    +22305
list        15766    +15766
list           15767        +1
Interactive        3        +1
