# High Performance Python


* How and what to measure
* Set vs List
* Append vs Comprehension
* Namespace
* Iterators vs Generators

## How and what to measure

* memory profiler
* timeit


In [None]:
%load_ext memory_profiler
import timeit


## Append vs Comprehension
It's all about memory allocation

In [None]:
arraySize = 100_00
%memit [i*i for i in range(0,arraySize)] 
%timeit [i*i for i in range(0,arraySize)] 

In [None]:
%%memit array = [] 
for i in range(arraySize):
    array.append(i*i)

In [None]:
%%timeit array = [] 
for i in range(arraySize):
    array.append(i*i)

In [None]:
%timeit tup = (1,2,3,4,5,6,7,8)
tup = (1,2,3,4,5,6,7,8)
%timeit tup[3]

In [None]:
%timeit lis = [1,2,3,4,5,6,7,8]
lis = [1,2,3,4,5,6,7,8]
%timeit lis[3]

- Lists are dynamics arrays, they are mutable and allow for resizing
- Tuples are static arrays, immutable, no resizing
- Memory allocation in list is nonlinear 
- Tuples are cached by the Python runtime
- High Performance Python P.69
 - https://pepa.holla.cz/wp-content/uploads/2016/08/High-Performance-Python.pdf

## List & Set

In [None]:
import names
import random
def listUniqueNames(phoneBook):
    uniqueNames = []
    for name, phone in phoneBook:
        firstName, lastName = name.split(" ",1)
        for unique in uniqueNames:
            if unique == firstName:
                break
        else:
                uniqueNames.append(firstName)
    return len(uniqueNames)

def setUniqueNames(phoneBook):
    uniqueNames = set()
    for name, phone in phoneBook:
        firstName, lastName = name.split(" ",1)
        uniqueNames.add(firstName)
    return len(uniqueNames)

phoneBook=[("Jonatan Chow","1234567")]

# Keep size below 100_00
size = 100_00
for i in range(size):
    phoneBook.append((names.get_full_name(),"12344566545"))

largePhoneBook = phoneBook

In [None]:
%timeit -r 5 -n 100 listUniqueNames(largePhoneBook)
%timeit -r 5 -n 100 setUniqueNames(largePhoneBook)

## Namespace

In [None]:
import math 
from math import sin

In [None]:
def test1(x):
    res = 1
    for _ in range(1000):
        res += math.sin(x)
    return res


def test2(x):
    res = 1
    for _ in range(1000):
        res += sin(x)
    return res


def test3(x, sin=math.sin):
    res = 1
    for _ in range(1000):
        res += sin(x)
    return res

In [None]:
%timeit -r 10 -n 1000 test1(0.05)
%timeit -r 10 -n 1000 test2(0.05)
%timeit -r 10 -n 1000 test3(0.05)

### Python Hierarchy
 1) locals array
 
 2) globals dictionary
 
 3) __builtin__ object

In [None]:
def test1(x):
    res = 1
    for _ in range(1000):
        res += math.sin(x)
    return res


def test2(x):
    res = 1
    for _ in range(1000):
        res += sin(x)
    return res


def test3(x, sin=math.sin):
    res = 1
    for _ in range(1000):
        res += sin(x)
    return res

- test1 : first look at the math library then look at sin 
- test2 : look at the global namespace
- test3 : sin function is stored as a local variable 

## Iterators vs Generators

In [None]:
def fibonacci_list(num_items):
    numbers = []
    a, b = 0, 1
    while len(numbers) < num_items:
        numbers.append(a)
        a, b = b, a + b
    return numbers

def fibonacci_gen(num_items):
    a, b = 0, 1
    while num_items:
        yield a
        a, b = b, a + b
        num_items -= 1

In [None]:
def test_fib_list(size):
    for i in fibonacci_list(size):
        pass
    
def test_fib_gen(size):
    for i in fibonacci_gen(size):
        pass

In [None]:
# Keep size small 
size = 100_0

%timeit -r 10 -n 1000 test_fib_list(size)
%timeit -r 10 -n 1000 test_fib_gen(size)

In [None]:
size = 100_000
%memit test_fib_list(size)
%memit test_fib_gen(size)

In [None]:
size = 100_000
%memit len([n for n in fibonacci_list(size) if n%3 ==0])
%memit len([n for n in fibonacci_gen(size) if n%3 ==0])

## Anything else you could do? 

A lot. 
- Collections 
- Async
- Compiling to C
- Multiprocessing
 - threads
  - processes
- Cluster