In [None]:
# Show all output values
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
# Configuration for the slideshow, DO NOT RUN if you aren't presenting this
from traitlets.config.manager import BaseJSONConfigManager
path = "/home/quickbeam/anaconda3/envs/cogsys-python-intro/etc/jupyter/nbconfig"
cm = BaseJSONConfigManager(config_dir=path)
cm.update("livereveal", {
              "start_slideshow_at": "selected",
})

# Why is My Program Slow?

- Python is slow
- I'm doing **a lot**

## What is *doing a lot*?

In [None]:
3 + 4

In [None]:
%timeit 3 + 4

In [None]:
1000000 + 1000000

In [None]:
%timeit 1000000 + 1000000

## What is *doing a lot*?

In [None]:
[1, 2, 3] + [4, 5, 6]

In [None]:
list(range(1000000)) + list(range(1000000))

In [None]:
import numpy as np
np.array([1, 2, 3]) + np.array([4, 5, 6])

In [None]:
np.arange(1000000) + np.arange(1000000)

In [None]:
a = np.array([[1, 2],
              [3, 4]])
b = np.array([[5, 6],
              [7, 8]])
np.matmul(a, b)

motivation: give graphs that demostrate

try comparing numpy vs `math` performance

don't define loops inside functions, explicit looping is expensive

use `tqdm`!

for debugging, use small initial batches

for loop not scoping: use last run's variables for debugging

cprofile (recursive!!!) = use the 80/20 rule

if vs try/except

thumb rule: look at loops

COUNT iterations! (simply say number of states times number of timesteps for viterbi)

## Complexity

- care not about *exact number* of steps, but how their number *grows* with the input
- always consider the *worst case scenario*
- nerds use this notation for it: $O(n)$, $O(n^2)$, $O(n \times m)$

## How Many Steps?

In [None]:
[1, 2, 3].append(4)

In [None]:
5 in [1, 2, 3]

In [None]:
[1, 2, 3].index(1)

In [None]:
[1, 2, 3].index(2)

In [None]:
[1, 2, 3].index(3)

## Group Exercise :)

use %timeit here!!!

two lists, find collisions "meet in the middle"

Find two numbers so that they sum to S (some other number)

In [None]:
# make a big alphabet
alphabet = ["a", "b", "c", " "]
for letter in "This is a very very very very long sentence but don't read too much into it!":
    print(letter in alphabet)

In [None]:
alphabet = {"a", "b", "c", " "}
for letter in "This is a very very very very long sentence but don't read too much into it!":
    print(letter in alphabet)

## Group Exercise :)

time this!

In [None]:
def word_counts(count_src, words_to_check):
    return [count_src.count(word) for word in words_to_check]

In [None]:
from collections import Counter
def word_counts(count_src, words_to_check):
    count_src = Counter(count_src)
    return [count_src[word] for word in words_to_check]

## Don't Run Custom Functions Inside Loops!

Just kidding :)

In [1]:
def no_loop(x):
    return str(x)

In [2]:
%timeit l = [no_loop(x) for x in range(1000000)]

376 ms ± 6.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
def loop_inside(seq):
    return [str(x) for x in seq]

In [4]:
%timeit l = loop_inside(range(1000000))

281 ms ± 1.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit l = []
for x in range(1000000):
    l.append(no_loop(x))

536 ms ± 7.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%timeit l = [str(x) for x in range(1000000)]

290 ms ± 5.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%timeit l = list(map(str, range(1000000)))

226 ms ± 3.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit l = []
for x in range(1000000):
    l.append(dummy(x))

527 ms ± 8.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%timeit l = list(map(dummy, range(1000000)))

391 ms ± 6.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Viterbi vs naive (use itertools for naive -> code so simple, why so slow?)