### Vectorization

In [4]:
import sys
sys.path.append("./code")

In [5]:
# Example random walk

In [6]:
# Object Oriented Approach
import random
import numpy as np

class RandomWalker:
    def __init__(self):
        self.position = 0

    def walk(self, n):
        self.position = 0
        for i in range(n):
            yield self.position
            self.position += 2*random.randint(0, 1) - 1


In [7]:
walker = RandomWalker()
walk = [position for position in walker.walk(100)]
print(walk)

[0, -1, 0, -1, -2, -1, -2, -3, -4, -3, -2, -3, -2, -1, 0, -1, -2, -1, -2, -3, -4, -3, -4, -3, -4, -3, -2, -3, -4, -5, -4, -3, -4, -3, -2, -1, -2, -3, -4, -5, -4, -3, -2, -1, -2, -3, -2, -3, -4, -3, -2, -1, -2, -3, -2, -3, -4, -5, -6, -5, -4, -3, -2, -3, -2, -3, -4, -3, -4, -5, -4, -3, -4, -5, -6, -5, -6, -5, -6, -5, -4, -3, -2, -1, -2, -3, -2, -3, -4, -3, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11]


In [8]:
# Benchmark
from tools import timeit
walker = RandomWalker()
timeit("[position for position in walker.walk(n=10000)]", globals=globals())

10 loops, best of 3: 13.1 msec per loop


In [9]:
# Procedural Approach

def random_walk(n):
    position = 0
    walk = [position]
    for i in range(n):
        step = 2*random.randint(0, 1)-1
        position += step
        walk.append(position)
    return walk


In [10]:
timeit("random_walk(n=10000)", globals())

10 loops, best of 3: 12.1 msec per loop


### Iterators

In [11]:
iter([1, 2, 3, 4])

<list_iterator at 0x7f49f28c2780>

In [12]:
list(map(len, ['abc', 'de', 'fghi']))

[3, 2, 4]

In [13]:
list(map(sum, zip([1, 2, 3], [4, 5, 6])))

[5, 7, 9]

In [14]:
import itertools
import operator

In [16]:
data = ['a','b','c']
list(itertools.accumulate(data))



['a', 'ab', 'abc']

In [17]:
shapes = ['square', 'rectangle', 'circle']
list(itertools.combinations(shapes,2))

[('square', 'rectangle'), ('square', 'circle'), ('rectangle', 'circle')]

In [19]:
# Given a list of values inputs and a positive integer n, write a function that splits inputs into groups of length n. 
# For simplicity, assume that the length of the input list is divisible by n. 
# For example, if inputs = [1, 2, 3, 4, 5, 6] and n = 2, your function should return [(1, 2), (3, 4), (5, 6)].



In [20]:
def naive_grouper(inputs, n):
    num_groups = len(inputs) // n
    return [tuple(inputs[i*n:(i+1)*n]) for i in range(num_groups)]

In [21]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
naive_grouper(nums, 2)

[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]

In [22]:
# time -f "Memory used (kB): %M\nUser time (seconds): %U" python3 naive.py


In [23]:
# Now with iterator
def better_grouper(inputs, n):
    iters = [iter(inputs)] * n
    return zip(*iters)

In [24]:
iters = [iter(nums)] * 2
list(id(itr) for itr in iters)


[139955578613656, 139955578613656]

In [27]:
iters = [iter(nums), iter(nums)]
list(id(itr) for itr in iters)

[139955942594992, 139955942593928]

In [26]:
list(better_grouper(nums, 2))

[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]

In [26]:
# time -f "Memory used (kB): %M\nUser time (seconds): %U" python3 better.py


In [28]:
help(random.choices)

Help on method choices in module random:

choices(population, weights=None, *, cum_weights=None, k=1) method of random.Random instance
    Return a k sized list of population elements chosen with replacement.
    
    If the relative weights or cumulative weights are not specified,
    the selections are made with equal probability.



### Back to Random Walk

In [29]:
# Random walk with itertools
def random_walk_faster(n=1000):
    from itertools import accumulate
    # Only available from Python 3.6
    steps = random.choices([-1,+1], k=n)
    return [0]+list(accumulate(steps))

walk = random_walk_faster(1000)

In [30]:
timeit("random_walk_faster(n=10000)", globals())

10 loops, best of 3: 2.22 msec per loop


In [31]:
# Random walk with numpy
def random_walk_fastest(n=1000):
    # No 's' in numpy choice (Python offers choice & choices)
    steps = np.random.choice([-1,+1], 1000)
    return np.cumsum(steps)

walk = random_walk_fastest(1000)

In [32]:
timeit("random_walk_fastest(n=10000)", globals())

100 loops, best of 3: 19.5 usec per loop


### Numpy Arrays

In [42]:
import numpy as np

a = np.array([1, 2, 3])
print(type(a))
print(a.shape)
print(a[0], a[1], a[2])

<class 'numpy.ndarray'>
(3,)
1 2 3


In [43]:
b = np.array([[1,2,3],[4,5,6]])
print(b.shape)
print(b[0, 0], b[0, 1], b[1, 0])

(2, 3)
1 2 4


### Creating Arrays

In [44]:
a = np.zeros((2,2))
print(a)

b = np.ones((1,2))
print(b)

# Constant Matrix
c = np.full((2,2), 7)
print(c)

# Identity Matrix
d = np.eye(2)
print(d)

e = np.random.random((2,2))
print(e)

[[0. 0.]
 [0. 0.]]
[[1. 1.]]
[[7 7]
 [7 7]]
[[1. 0.]
 [0. 1.]]
[[0.20333973 0.93431575]
 [0.95725993 0.16194613]]


### Array Indexing

In [46]:
# Create the following rank 2 array with shape (3, 4)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

b = a[:2, 1:3]
print(b)

print(a[0,1])
b[0,0] = 77
print(a)

[[2 3]
 [6 7]]
2
[[ 1 77  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [48]:
# Mixing indexing with slicing
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

row_r1 = a[1, :]
row_r2 = a[1:2, :]
print(row_r1, row_r1.shape)
print(row_r2, row_r2.shape)

col_r1 = a[:, 1]
col_r2 = a[:, 1:2]
print(col_r1, col_r1.shape)
print(col_r2, col_r2.shape)

[5 6 7 8] (4,)
[[5 6 7 8]] (1, 4)
[ 2  6 10] (3,)
[[ 2]
 [ 6]
 [10]] (3, 1)


In [49]:
# More integer indexing

a = np.array([[1,2], [3, 4], [5, 6]])

print(a[[0, 1, 2], [0, 1, 0]])
print(np.array([a[0, 0], a[1, 1], a[2, 0]]))

print(a[[0, 0], [1, 1]])
print(np.array([a[0, 1], a[0, 1]]))


[1 4 5]
[1 4 5]
[2 2]
[2 2]


In [50]:
# Mutating a single element from each row of a matrix

a = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
print(a)

# array of indexes
b = np.array([0, 2, 0, 1])
print(a[np.arange(4), b])

# mutate
a[np.arange(4), b] += 10

print(a)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[ 1  6  7 11]
[[11  2  3]
 [ 4  5 16]
 [17  8  9]
 [10 21 12]]


In [3]:
# Boolean Array Indexing

a = np.array([[1,2], [3,4], [5,6]])

bool_idx = (a > 2)

print (bool_idx)

print (a[bool_idx])

print (a[a > 2])

[[False False]
 [ True  True]
 [ True  True]]
[3 4 5 6]
[3 4 5 6]


In [4]:
# Datatypes

x = np.array([1,2])
print (x.dtype)

x = np.array([1.0, 2.0])
print (x.dtype)

x = np.array([1,2], dtype=np.int64)
print (x.dtype)

int64
float64
int64


In [5]:
# Array elementwise arithmetic

x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)

print(x + y)
print(np.add(x, y))

print(x - y)
print(np.subtract(x, y))

print(x * y)
print(np.multiply(x, y))

print(x / y)
print(np.divide(x, y))

print(np.sqrt(x))

[[ 6.  8.]
 [10. 12.]]
[[ 6.  8.]
 [10. 12.]]
[[-4. -4.]
 [-4. -4.]]
[[-4. -4.]
 [-4. -4.]]
[[ 5. 12.]
 [21. 32.]]
[[ 5. 12.]
 [21. 32.]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[1.         1.41421356]
 [1.73205081 2.        ]]


In [7]:
# sum

x = np.array([[1,2],[3,4]])

print (x)
print (np.sum(x))
print (np.sum(x, axis=0))  # sum along columns
print (np.sum(x, axis=1))  # sum along rows

[[1 2]
 [3 4]]
10
[4 6]
[3 7]


In [8]:
# Transpose 

x = np.array([[1,2], [3,4]])
print (x)
print (x.T)

v = np.array([1,2,3])
print (v)
print (v.T)

[[1 2]
 [3 4]]
[[1 3]
 [2 4]]
[1 2 3]
[1 2 3]


In [10]:
# Add a constant vector to each row of a matrix

x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)

# Add the vector v to each row of the matrix x with an explicit loop
for i in range(4):
    y[i, :] = x[i, :] + v

print (y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [11]:
# Tile and add
vv = np.tile(v, (4, 1))
print (vv)

print(x + vv)

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [12]:
# Now with Numpy broadcasting

print (x + v)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [13]:
# More broadcasting

v = np.array([1, 2, 3])
w = np.array([4, 5])

print(np.reshape(v, (3,1)) * w)

x = np.array([[1,2,3], [4,5,6]])

print (x + v)

print ((x.T + w).T)

print (x + np.reshape(w, (2, 1)))

print (x * 2)

[[ 4  5]
 [ 8 10]
 [12 15]]
[[2 4 6]
 [5 7 9]]
[[ 5  6  7]
 [ 9 10 11]]
[[ 5  6  7]
 [ 9 10 11]]
[[ 2  4  6]
 [ 8 10 12]]
