# Fast calculations with numpy
------------------------------------------------

Check Jake Vanderplas PyCom 2015

In [1]:
import numpy as np

# ufuncts are fast

In [2]:
a = list(range(100000))

In [3]:
%timeit b =[val+5 for val in a]

100 loops, best of 3: 3.8 ms per loop


In [4]:
a = np.array(a)
%timeit a+5

10000 loops, best of 3: 67 µs per loop


# Aggregations

In [5]:
from random import random

In [6]:
c = [random() for i in range(10000000)]
%timeit min(c)

1 loops, best of 3: 201 ms per loop


In [7]:
%timeit np.array(c).min()

1 loops, best of 3: 259 ms per loop


In [8]:
m = np.random.randint(0, 10, (4,4))
m

array([[5, 2, 9, 8],
       [4, 6, 7, 8],
       [9, 5, 8, 8],
       [2, 5, 5, 4]])

In [9]:
(m.sum(), m.sum(axis=0),m.sum(axis=1) )

(95, array([20, 18, 29, 28]), array([24, 25, 30, 16]))

In [10]:
(m.argmin(), m.argmin(axis=0),m.argmin(axis=1) )

(1, array([3, 0, 3, 3]), array([1, 0, 1, 0]))

# Broadcasting

In [11]:
a = np.array([1,2,3,4])
b = np.array([1,1]) 
b.shape

(2,)

In [12]:
c = a.reshape(2,2)
d = b.reshape(2,1)
d + b

array([[2, 2],
       [2, 2]])

# Masking, slicing and indexing



In [13]:
a = np.random.randint(0,10, 10)
a

array([6, 8, 8, 9, 0, 0, 1, 3, 9, 3])

In [14]:
a < 3

array([False, False, False, False,  True,  True,  True, False, False, False], dtype=bool)

In [15]:
a[a<3]

array([0, 0, 1])

In [16]:
m = np.random.randint(0, 10, (5,5))

In [17]:
m[m<4]

array([3, 0, 1, 1, 1, 2, 3, 0, 0])

In [18]:
mask = (a < 2) | (a >5)
mask

array([ True,  True,  True,  True,  True,  True,  True, False,  True, False], dtype=bool)

In [19]:
a[mask]

array([6, 8, 8, 9, 0, 0, 1, 9])

In [20]:
ind = [0,3,1]  # Fancy indexing
a[ind]

array([6, 9, 8])

In [21]:
m

array([[6, 6, 3, 9, 5],
       [4, 0, 1, 1, 9],
       [4, 1, 8, 2, 8],
       [3, 8, 0, 6, 0],
       [4, 6, 6, 8, 8]])

In [22]:
(m[1,1], m[:,2], m[2,:])

(0, array([3, 1, 8, 0, 6]), array([4, 1, 8, 2, 8]))

In [23]:
m[[1,3],0:4 ]  #mixing fancy indexing and slicing

array([[4, 0, 1, 1],
       [3, 8, 0, 6]])

In [24]:
m.sum(axis=1)

array([29, 15, 23, 17, 32])

In [25]:
m[m.sum(axis=1)>25, 3:] # mixing masking and slicing

array([[9, 5],
       [8, 8]])

In [26]:
m.sum(axis=0)

array([21, 21, 18, 26, 30])

In [27]:
m[ :3, m.sum(axis=0)>25] # mixing masking and slicing

array([[9, 5],
       [1, 9],
       [2, 8]])

In [50]:
mask = abs(m-1.0)< 2
mask
m[mask]
idx = np.where(mask)
idx

(array([1, 1, 1, 2, 2, 3, 3]), array([1, 2, 3, 1, 3, 2, 4]))

In [48]:
mask

array([[False, False, False, False, False],
       [False,  True,  True,  True, False],
       [False,  True, False,  True, False],
       [False, False,  True, False,  True],
       [False, False, False, False, False]], dtype=bool)

In [51]:
m[mask]

array([0, 1, 1, 1, 2, 0, 0])

# Application: nearest neighbors

In [29]:
N = 5
X = np.random.random((N,3))

In [30]:
X.shape

(5, 3)

In [31]:
diff = X.reshape(N,1,3) -X
diff.shape

(5, 5, 3)

In [32]:
D = (diff**2).sum(2)

In [33]:
D.shape

(5, 5)

In [34]:
ii = range(N)
D[ii, ii] = np.inf 

In [35]:
i = np.argmin(D,1)

In [36]:
print i[:4]

[2 4 0 4]


In [37]:
print X[i]

[[ 0.04510519  0.57428923  0.03939071]
 [ 0.92162063  0.40820185  0.72900728]
 [ 0.15751968  0.22234212  0.5180393 ]
 [ 0.92162063  0.40820185  0.72900728]
 [ 0.98030052  0.50475691  0.83134635]]
