# Problem 1

In [17]:
import numpy as np
import random

In [19]:
# pure Python implementation of the walk
def drunkard_walk_py(steps = 100, simulations = 1000):
    min_dist = 100
    max_dist = 0
    avg_dist = 0
    for sim in range(simulations):
        dist = 0
        for step in range(steps):
            dist += random.randint(0,1)
        avg_dist += dist
        min_dist = min(dist,min_dist)
        max_dist = max(dist,max_dist)
    avg_dist /= simulations
    return avg_dist, min_dist, max_dist

drunkard_walk_py()

(50.121, 26, 66)

In [20]:
%timeit drunkard_walk_py()

391 ms ± 6.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Problem 2

In [23]:
# pure Python implementation of the walk
def drunkard_walk_numpy(steps = 100, simulations = 1000):
    min_dist = 100
    max_dist = 0
    avg_dist = 0
    for sim in range(simulations):
        walk = np.random.randint(0,2,steps)
        dist = np.sum(walk) 
        avg_dist += dist
        min_dist = min(dist,min_dist)
        max_dist = max(dist,max_dist)
    avg_dist /= simulations
    return avg_dist, min_dist, max_dist

drunkard_walk_numpy()

(50.134999999999998, 32, 71)

In [24]:
%timeit drunkard_walk_numpy()

20.3 ms ± 87.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


Numpy implementation is ~20 times faster

# Extra credit problem

In [25]:
def drunkard_walk_extra(steps = 100, simulations = 1000):
    walks = np.random.randint(0,2,[steps, simulations])
    dist = np.sum(walks, axis = 0)
    min_dist = np.min(dist)
    max_dist = np.max(dist)
    avg_dist = np.mean(dist)
    return avg_dist, min_dist, max_dist

drunkard_walk_extra()

(50.045000000000002, 33, 64)

In [26]:
%timeit drunkard_walk_extra()

1.38 ms ± 6.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


NumPy implementation, without Python loops is ~20 times as fast as the previous (Problem 2). It is ~400 times as fast as pure Python.  

# Remarks & Comments

In [28]:
# Calling array's method sum without specifying axis (incorrect)
arr = np.random.randint(0,2,[1000,100])
arr.sum()

49937

In [None]:
arr.sum(axis = 1) # eliminate axis = 1, correct

In [31]:
# Using Python sum function:
sum(arr) # accidentally incorrect, since it sums over the first axis (incorrect)

array([496, 507, 499, 510, 511, 486, 495, 476, 490, 484, 519, 514, 507,
       496, 496, 474, 489, 502, 488, 521, 487, 496, 512, 482, 509, 510,
       508, 493, 508, 515, 487, 484, 477, 495, 503, 508, 504, 501, 502,
       518, 502, 491, 488, 500, 510, 501, 502, 491, 490, 524, 532, 475,
       489, 490, 499, 496, 496, 485, 500, 483, 525, 468, 478, 483, 496,
       498, 493, 503, 498, 490, 517, 512, 513, 502, 499, 507, 516, 532,
       528, 504, 517, 485, 475, 484, 523, 502, 527, 504, 498, 465, 516,
       525, 481, 487, 491, 500, 498, 484, 504, 506])

In [33]:
sum(arr, axis = 1) # this should have been the solution, but it is not possible (correct, but won't work)

TypeError: sum() takes no keyword arguments

In [None]:
sum(arr.T) # transpose the array first, then use sum function (correct, but maybe less efficient)

In [None]:
del avgArray # what if avgArray does not exist
#avgArray = np.zeros(1000) # not needed
# avgArray[:] = np.sum(arr, axis = 1) # indices are not needed
avgArray = np.sum(arr, axis = 1) # this is sufficient
avgArray

In [76]:
np.atan(10,10)

AttributeError: module 'numpy' has no attribute 'atan'