# Arrays and Vectorized Computation

In [9]:
import matplotlib as plt
%matplotlib inline
from __future__ import division
from numpy.random import randn
import numpy as np
np.set_printoptions(precision=4, suppress=True)

## The NumPy ndarray: A Multidimensional Array Object

In [4]:
data = randn(2,3)

In [5]:
data

array([[ 0.2392, -1.2595,  0.1063],
       [-0.8541,  0.5128,  0.9099]])

In [6]:
data * 10

array([[  2.392 , -12.5955,   1.063 ],
       [ -8.5411,   5.1281,   9.099 ]])

In [8]:
data + data

array([[ 0.4784, -2.5191,  0.2126],
       [-1.7082,  1.0256,  1.8198]])

In [9]:
data.dtype

dtype('float64')

### Creating ndarrays

In [10]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [11]:
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [12]:
arr2.ndim

2

In [14]:
arr2.shape

(2, 4)

In [15]:
arr1.dtype

dtype('float64')

In [16]:
arr2.dtype

dtype('int64')

In [17]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [18]:
np.zeros((3,6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [19]:
np.empty((2,3,2))

array([[[  2.6816e+154,  -2.6868e+154],
        [  6.9277e-310,   6.9277e-310],
        [  6.9277e-310,   0.0000e+000]],

       [[  2.6816e+154,   2.6816e+154],
        [  1.4946e-154,   2.6816e+154],
        [  9.8813e-324,   8.3440e-309]]])

In [20]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### Data Types for ndarrays

In [21]:
arr1 = np.array([1,2,3], dtype=np.float64)

In [22]:
arr2 = np.array([1,2,3], dtype=np.int32)

In [23]:
arr1.dtype

dtype('float64')

In [24]:
arr2.dtype

dtype('int32')

In [25]:
#casting
arr = np.array([1,2,3,4])

In [26]:
arr.dtype

dtype('int64')

In [27]:
float_arr = arr.astype(np.float64)

In [28]:
float_arr.dtype

dtype('float64')

In [29]:
arr = np.array([3.7,-1.2,-2,6,0.5,12.9,10.1])
arr

array([  3.7,  -1.2,  -2. ,   6. ,   0.5,  12.9,  10.1])

In [30]:
arr.astype(np.int32)

array([ 3, -1, -2,  6,  0, 12, 10], dtype=int32)

In [32]:
numeric_strings = np.array(['1.25','-9.6','42'], dtype=np.string_)
numeric_strings

array(['1.25', '-9.6', '42'], 
      dtype='|S4')

In [33]:
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

In [35]:
int_array = np.arange(10)
int_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
calibers = np.array([.22,.270,.357,.380,.44,.50], dtype=np.float64)

In [37]:
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [38]:
empty_uint32 = np.empty(8,dytpe='u4')

TypeError: 'dytpe' is an invalid keyword argument for this function

### Operations between Arrays and Scalars

In [40]:
arr = np.array([[1.,2.,3.],[4.,5.,6.]])

In [42]:
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [43]:
arr * arr

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [44]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [45]:
1/arr

array([[ 1.    ,  0.5   ,  0.3333],
       [ 0.25  ,  0.2   ,  0.1667]])

In [46]:
arr ** 0.5

array([[ 1.    ,  1.4142,  1.7321],
       [ 2.    ,  2.2361,  2.4495]])

In [48]:
arr ** 2

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

### Basic Indexing and Slicing

In [49]:
arr = np.arange(10)

In [50]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [51]:
arr[5]

5

In [52]:
arr[5:8]

array([5, 6, 7])

In [53]:
arr[5:8]=12

In [54]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [55]:
arr_slice = arr[5:8]

In [56]:
arr_slice[1] = 12345

In [57]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [58]:
arr_slice[:] = 64

In [59]:
arr_slice

array([64, 64, 64])

In [60]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [61]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [62]:
arr2d[2]

array([7, 8, 9])

In [63]:
arr2d[0][2]

3

In [64]:
arr2d[0,2]

3

In [65]:
arr3d= np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

In [66]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [67]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [68]:
old_values=arr3d[0].copy()

In [69]:
arr3d[0]=42

In [70]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [71]:
arr3d[0]=old_values

In [72]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [73]:
arr3d[1,0]

array([7, 8, 9])

#### Indexing with slices

In [74]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [75]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [76]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [77]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [78]:
arr2d[1,:2]

array([4, 5])

In [79]:
arr2d[2,:1]

array([7])

In [80]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

In [82]:
arr2d[:2,1:]=0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

### Boolean Indexing

In [83]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])

In [84]:
data = np.random.randn(7,4)

In [85]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='|S4')

In [86]:
data

array([[-0.0203, -1.1111,  0.2017,  0.3496],
       [ 0.039 ,  1.7775, -0.1774,  1.2332],
       [-0.1407, -1.3713, -1.4609,  0.0667],
       [ 0.2618,  1.4181,  0.346 ,  0.5331],
       [ 0.6729, -1.0771,  0.0002,  1.1378],
       [ 1.1981,  0.0388, -0.3735,  0.6734],
       [-0.0038,  0.2204, -1.3909, -0.0294]])

In [87]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [89]:
data[names=='Bob']

array([[-0.0203, -1.1111,  0.2017,  0.3496],
       [ 0.2618,  1.4181,  0.346 ,  0.5331]])

In [90]:
data[names=='Bob',2:]

array([[ 0.2017,  0.3496],
       [ 0.346 ,  0.5331]])

In [91]:
data[names=='Bob',3]

array([ 0.3496,  0.5331])

In [92]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [93]:
data[-(names=='Bob')]

  """Entry point for launching an IPython kernel.


array([[ 0.039 ,  1.7775, -0.1774,  1.2332],
       [-0.1407, -1.3713, -1.4609,  0.0667],
       [ 0.6729, -1.0771,  0.0002,  1.1378],
       [ 1.1981,  0.0388, -0.3735,  0.6734],
       [-0.0038,  0.2204, -1.3909, -0.0294]])

In [94]:
mask = (names=='Bob') | (names == 'Will')

In [95]:
mask

array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [96]:
data[mask]

array([[-0.0203, -1.1111,  0.2017,  0.3496],
       [-0.1407, -1.3713, -1.4609,  0.0667],
       [ 0.2618,  1.4181,  0.346 ,  0.5331],
       [ 0.6729, -1.0771,  0.0002,  1.1378]])

In [97]:
data

array([[-0.0203, -1.1111,  0.2017,  0.3496],
       [ 0.039 ,  1.7775, -0.1774,  1.2332],
       [-0.1407, -1.3713, -1.4609,  0.0667],
       [ 0.2618,  1.4181,  0.346 ,  0.5331],
       [ 0.6729, -1.0771,  0.0002,  1.1378],
       [ 1.1981,  0.0388, -0.3735,  0.6734],
       [-0.0038,  0.2204, -1.3909, -0.0294]])

In [98]:
data[data < 0] = 0

In [99]:
data

array([[ 0.    ,  0.    ,  0.2017,  0.3496],
       [ 0.039 ,  1.7775,  0.    ,  1.2332],
       [ 0.    ,  0.    ,  0.    ,  0.0667],
       [ 0.2618,  1.4181,  0.346 ,  0.5331],
       [ 0.6729,  0.    ,  0.0002,  1.1378],
       [ 1.1981,  0.0388,  0.    ,  0.6734],
       [ 0.    ,  0.2204,  0.    ,  0.    ]])

## Example: Random Walks

In [5]:
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
    step = 1 if random.randint(0,1) else -1
    position += step
    walk.append(position)

In [15]:
nsteps = 1000
draws = np.random.randint(0,2,size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

In [16]:
walk.min()

-36

In [17]:
walk.max()

10

In [18]:
(np.abs(walk) >= 10).argmax()

167

### Simulating Many Random Walks at Once

In [19]:
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0,2,size=(nwalks,nsteps)) # 0 or 1
steps = np.where(draws>0,1,-1)
walks = steps.cumsum(1)
walks

array([[   1,    2,    1, ...,  -46,  -47,  -48],
       [   1,    2,    1, ..., -102, -103, -104],
       [   1,    2,    3, ...,   22,   21,   20],
       ..., 
       [  -1,    0,    1, ...,    4,    5,    6],
       [  -1,   -2,   -3, ...,  -24,  -25,  -24],
       [   1,    2,    3, ...,  -68,  -67,  -66]])

In [20]:
walks.shape

(5000, 1000)

In [21]:
walks.max()

128

In [23]:
walks.min()

-114

In [24]:
hits30 = (np.abs(walks)>=30).any(1)

In [25]:
hits30

array([ True,  True,  True, ..., False,  True,  True], dtype=bool)

In [26]:
hits30.sum() # the number that hit 30 or -30

3404

In [27]:
crossing_times = (np.abs(walks[hits30])>=30).argmax(1)

In [28]:
crossing_times

array([447, 221, 671, ..., 475, 161, 433])

In [29]:
crossing_times.mean()

497.74030552291424