# Micro-benchmarks and timeit

In [None]:
import io

def build_concat(strings):
    result = ''
    for s in strings:
        result += s
    return result

def build_join(strings):
    return ''.join(strings)

def build_sio(strings):
    sio = io.StringIO()
    for s in strings:
        sio.write(s)
    return sio.getvalue()

strings = [str(i) for i in range(1000)]

In [None]:
build_concat(strings) == build_join(strings) == build_sio(strings) 

In [None]:
import timeit

In [None]:
for fname in ('concat', 'join', 'sio'):
    elapsed = timeit.timeit(
        'build_%s(strings)' % fname, 
        globals=globals(),
        number=10_000)
    print(fname, elapsed)

## Jupyter Notebook (and IPython) provide a nice helper

We can use `%timeit` (or usually just `timeit`) to run a version of `timeit` in IPython

In [None]:
%%timeit 
build_concat(strings)

In [None]:
%timeit build_join(strings)

In [None]:
%timeit build_sio(strings)

## Results of our microbenchmark

- `''.join()` is the fastest, around 10x faster than concatenating strings
- You should use `''.join` when you need to build a big string from a list of strings

# Profiling Python Code

You can profile an entire script by running it via `python -m cProfile` to get summary information about the whole thing:

In [None]:
pwd

In [None]:
%%file data/profiling/profiletest.py
import re

text = '''The quick brown fox jumps over the lazy dog'''
for x in range(10_000):
    re.search('fox', text)

In [None]:
!python -m cProfile data/profiling/profiletest.py

In [None]:
!python -m cProfile --help

In [None]:
!python -m cProfile -s time data/profiling/profiletest.py

In [None]:
%%file data/profiling/profiletest.py
import re

text = '''The quick brown fox jumps over the lazy dog'''
my_regex = re.compile('fox')
for x in range(10_000):
    my_regex.search(text)

In [None]:
!python -m cProfile -s time data/profiling/profiletest.py 

In [None]:
%run -p data/profiling/profiletest.py

In [None]:
!python -m cProfile -s time -o profile-stats data/profiling/profiletest.py

### Direct profiling

We can also profile just a few Python statements or a function:

In [None]:
import re
import cProfile

In [None]:
cProfile.run('re.compile("foo|bar")', sort='time')

We can also save the statistics to a file for analysis later:

In [None]:
cProfile.run('re.compile("foo|bar")', 're-stats')

For analysis, we use the `pstats` module:

In [None]:
import pstats

In [None]:
p = pstats.Stats('profile-stats')

In [None]:
p.print_stats()

In [None]:
p.strip_dirs()
p.print_stats()

In [None]:
p.sort_stats('time')

In [None]:
p.print_stats(10)

Jupyter / IPython also has a magic function to help us here, as well:

In [None]:
%prun for x in range(10000): re.compile('foo|bar')

In [None]:
%%prun 
for x in range(10000): 
    re.compile('foo|bar')

# Instrumentation 

In [None]:
p = cProfile.Profile()

In [None]:
p.enable()
for x in range(10000):
    re.compile('re|foo')
    lst0 = range(100)
    lst1 = list(range(100))
p.disable()

In [None]:
p.print_stats(sort='time')

In [None]:
with p:
    re.compile('re|foo')
    lst0 = range(100)
    lst1 = list(range(100))
p.print_stats(sort='time')

(Profilers are already context managers in Python 3.8)

In [None]:
from contextlib import contextmanager

@contextmanager
def profiler(p):
    p.enable()
    try:
        yield p
    finally:
        p.disable()

In [None]:
with profiler(p):
    for x in range(10000):
        re.compile('re|foo')
        lst0 = range(100)
        lst1 = list(range(100))

In [None]:
p.print_stats(sort='time')

# Instrumenting high-performance code

There are times when we want to profile, but we don't want to incur the performance penalty. For instance, we might want to see the profile of a running production system, without impacting its performance in a major way.

For that, we can profile a _sample_ of the calls to a function.

In [None]:
import random
import functools

def instrument(profiler, probability=0.10):
    '''Profile some of the calls to the decorated function.
    
    The default probability of profiling a call is 10%.
    '''
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            if random.random() < probability:
                with profiler:  #if py38+
                    return func(*args, **kwargs)
#                 try:
#                     profiler.enable()
#                     return func(*args, **kwargs)
#                 finally:
#                     profiler.disable()
            else:
                return func(*args, **kwargs)
        return wrapper
    return decorator

In [None]:
import cProfile
prof = cProfile.Profile()

In [None]:
@instrument(prof, 0.2)
def build_join(strings):
    return ''.join(strings)

In [None]:
for x in range(10_000): 
    build_join(strings)

In [None]:
prof.print_stats(sort='time')

In [None]:
from contextlib import contextmanager

@contextmanager
def profiling(profiler, probability=0.10):
    if random.random() < probability:
        with profiler:
            yield profiler
#         try:
#             profiler.enable()
#             yield profiler
#         finally:
#             profiler.disable()
    else:
        yield None

In [None]:
%timeit random.random() < 0.10

In [None]:
prof = cProfile.Profile()
prof1 = cProfile.Profile()

num_profiles = 0
for x in range(10_000): 
    with profiling(prof, 0.02) as as_value:
        with profiling(prof1, 0.5) as as_value1:
            # If profiling, as_value == prof
            # If NOT profiling, as_value == None
            if as_value:
                num_profiles += 1
            ''.join(strings)

prof.print_stats(sort='time')

In [None]:
prof1.print_stats(sort='time')

In [None]:
num_profiles

# Lab

Open the [profiling lab][profiling_lab]

[profiling_lab]: ./profiling-lab.ipynb