# Optimization in Python

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from timeit import timeit

n = 100000
x = np.linspace(0, 1, n) # n points in [0, 1]
y = np.zeros(n)

def f(x):
    return x*x


# n zeros (float data type)
%timeit for i in xrange(n): y[i] = f(x[i])
# %timeit fn(x)
%timeit y = x*x

100 loops, best of 3: 17.6 ms per loop
10000 loops, best of 3: 67.9 µs per loop


In [2]:
n = 100000
x = np.linspace(0, 1, n) # n points in [0, 1]

from math import sin

%timeit for i in xrange(len(x)):  y[i] = sin(x[i])

%timeit for i in xrange(len(x)):  y[i] =np.sin(x[i])

%timeit y = np.sin(x)


100 loops, best of 3: 15.8 ms per loop
10 loops, best of 3: 54.3 ms per loop
1000 loops, best of 3: 1.06 ms per loop


## Numba
http://numba.pydata.org/numba-doc/0.31.0/user/jit.html

In [3]:
from timeit import timeit
from numba import jit
@jit 
def f(x,y):
    return x+y

def fp(x,y):
    return x+y


In [4]:
%timeit f(1,2)
%timeit fp(1,2)

The slowest run took 636718.18 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 156 ns per loop
The slowest run took 25.71 times longer than the fastest. This could mean that an intermediate result is being cached.
10000000 loops, best of 3: 56.4 ns per loop


## Swig
http://www.swig.org/Doc1.3/Python.html

## Parallel task with joblib

In [9]:
from joblib import Parallel, delayed
import time
from os import system

a = range(5)
b = range(0,10,2) 


def run_command(i):
    print i
    time.sleep(1)
    command = './a.out %d' %i
    system(command)


def run():
    arg = []
    for i in a:
        for j in b:
            arg.append([i,j])
    
    Parallel(n_jobs=2)(
        map(delayed(run_command), arg))


run()



[0, 2]
[0, 0]
[0, 4]
[0, 6]
[0, 8]
[1, 0]
[1, 2]
[1, 4]
[1, 6]
[1, 8]
[2, 2]
[2, 0]
[2, 4]
[2, 6]
[3, 0]
[2, 8]
[3, 2]
[3, 4]
[3, 6]
[3, 8]
[4, 2]
[4, 0]
[4, 4]
[4, 6]
[4, 8]
