# Fast calculations with numpy
------------------------------------------------

Check Jake Vanderplas PyCom 2015

In [78]:
import numpy as np

# ufuncts are fast

In [79]:
a = list(range(100000))

In [80]:
%timeit b =[val+5 for val in a]

100 loops, best of 3: 3.72 ms per loop


In [81]:
a = np.array(a)
%timeit a+5

10000 loops, best of 3: 74.4 µs per loop


# Aggregations

In [82]:
from random import random

In [83]:
c = [random() for i in range(10000000)]
%timeit min(c)

1 loops, best of 3: 202 ms per loop


In [84]:
%timeit np.array(c).min()

1 loops, best of 3: 261 ms per loop


In [85]:
m = np.random.randint(0, 10, (4,4))
m

array([[0, 3, 2, 7],
       [3, 3, 5, 7],
       [9, 7, 0, 3],
       [5, 9, 5, 3]])

In [86]:
(m.sum(), m.sum(axis=0),m.sum(axis=1) )

(71, array([17, 22, 12, 20]), array([12, 18, 19, 22]))

In [87]:
(m.argmin(), m.argmin(axis=0),m.argmin(axis=1) )

(0, array([0, 0, 2, 2]), array([0, 0, 2, 3]))

# Broadcasting

In [88]:
a = np.array([1,2,3,4])
b = np.array([1,1]) 
b.shape

(2,)

In [89]:
c = a.reshape(2,2)
d = b.reshape(2,1)
d + b

array([[2, 2],
       [2, 2]])

# Masking, slicing and indexing



In [90]:
a = np.random.randint(0,10, 10)
a

array([7, 2, 3, 0, 3, 1, 2, 4, 1, 2])

In [91]:
a < 3

array([False,  True, False,  True, False,  True,  True, False,  True,  True], dtype=bool)

In [92]:
a[a<3]

array([2, 0, 1, 2, 1, 2])

In [93]:
m = np.random.randint(0, 10, (5,5))

In [94]:
m[m<4]

array([0, 1, 2, 0, 2, 0, 1, 0, 0, 0, 0])

In [95]:
mask = (a < 2) | (a >5)
mask

array([ True, False, False,  True, False,  True, False, False,  True, False], dtype=bool)

In [96]:
a[mask]

array([7, 0, 1, 1])

In [97]:
ind = [0,3,1]  # Fancy indexing
a[ind]

array([7, 0, 2])

In [98]:
m

array([[8, 0, 9, 8, 8],
       [6, 4, 1, 2, 0],
       [2, 0, 9, 1, 4],
       [6, 0, 6, 6, 0],
       [0, 0, 9, 8, 6]])

In [99]:
(m[1,1], m[:,2], m[2,:])

(4, array([9, 1, 9, 6, 9]), array([2, 0, 9, 1, 4]))

In [100]:
m[[1,3],0:4 ]  #mixing fancy indexing and slicing

array([[6, 4, 1, 2],
       [6, 0, 6, 6]])

In [101]:
m.sum(axis=1)

array([33, 13, 16, 18, 23])

In [102]:
m[m.sum(axis=1)>25, 3:] # mixing masking and slicing

array([[8, 8]])

In [103]:
m.sum(axis=0)

array([22,  4, 34, 25, 18])

In [104]:
m[ :3, m.sum(axis=0)>25] # mixing masking and slicing

array([[9],
       [1],
       [9]])

# Application: nearest neighbors

In [105]:
N = 5
X = np.random.random((N,3))

In [106]:
X.shape

(5, 3)

In [107]:
diff = X.reshape(N,1,3) -X
diff.shape

(5, 5, 3)

In [108]:
D = (diff**2).sum(2)

In [109]:
D.shape

(5, 5)

In [110]:
ii = range(N)
D[ii, ii] = np.inf 

In [111]:
i = np.argmin(D,1)

In [112]:
print i[:4]

[4 3 0 2]


In [113]:
print X[i]

[[ 0.43435167  0.3714329   0.99719557]
 [ 0.97590093  0.62046408  0.09732212]
 [ 0.61958283  0.52541987  0.80435426]
 [ 0.93824918  0.8543004   0.78775989]
 [ 0.61958283  0.52541987  0.80435426]]
