In [3]:
# Numpy provides:
## ndarray - fast and space-efficient multidimensional array providing vectorized arithmetic operations
##    and sophisticated broadcasting capabilities
## standard mathematical functions for fast operations on entire arrays of data without having to write loops
## tools for reading / writing array data to disk and working with memory-mapped files
## linear algebra, random number generation, and fourier transform capabilities
## tools for integrating code written in C, C++, and Fortran

In [4]:
# ndarray - n-dimensional arrays
import random
import numpy as np
# create a random array between 0 and 1, with 2 row of 3 elements
data = np.random.uniform(0,1,[2,3])
data

array([[ 0.44958203,  0.25808899,  0.88854051],
       [ 0.57967269,  0.49456955,  0.87619026]])

In [5]:
data * 10

array([[ 4.49582035,  2.58088994,  8.88540513],
       [ 5.79672693,  4.94569553,  8.76190261]])

In [6]:
data + data

array([[ 0.89916407,  0.51617799,  1.77708103],
       [ 1.15934539,  0.98913911,  1.75238052]])

In [7]:
# array is generic multidimensional container for homogeneous data
# every array has a 
## shape - a tuple indicating the size of each dimension
## dtype - an object describing the data type of the arraydata.shape

In [8]:
data.shape

(2, 3)

In [9]:
data.dtype

dtype('float64')

In [10]:
# create arrays using the array function
# takes any sequence-like object (including other arrays)
# produces a new NumPy array containing the passed data
## try it on lists
data1 = [6, 7.5, 8, 0, 1]

In [11]:
arr1 = np.array(data1)

In [12]:
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [13]:
# nested sequences are converted into multidimensional arrays
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [14]:
arr2 = np.array(data2)

In [15]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [16]:
# ndim tells you how many dimensions there are
arr2.ndim

2

In [17]:
arr2.shape

(2, 4)

In [18]:
arr2.dtype

dtype('int64')

In [19]:
# other functions also create arrays
## zeros
## ones
## empty
## pass an index or a tuple
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [20]:
np.zeros((3, 6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [21]:
# sometimes np.empty passes zeros, sometimes it just passes garbage values
np.empty((2, 3, 2))

array([[[  0.00000000e+000,   0.00000000e+000],
        [  2.18131428e-314,   2.18131689e-314],
        [  2.17825190e-314,   2.18131786e-314]],

       [[  2.16987836e-314,   0.00000000e+000],
        [  2.17586540e-314,   2.16990480e-314],
        [  0.00000000e+000,   8.34402831e-309]]])

In [22]:
# arange is an array-valued version of the built-in Python range function
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [23]:
# standard array creation functions include:
## array             - convert input data to an ndarray
## asarray           - convert input to ndarray, but don't copy if the input is already an ndarray
## arange            - like built-in range but returns an array instead of a list
## ones, ones_like   - takes another array and produces a ones array of the same shape and dtype
## zeros, zeros_like - like ones and ones_like, but producing arrays of 0's instead
## empty, empty_like - create new arrays by allocating new memory, but don't populate with values
## eye, identity     - create a square N x N identity matrix (1's on the diagonal, 0's elsewhere)

In [24]:
# data types for arrays
arr1 = np.array([1, 2, 3], dtype = np.float64)
arr2 = np.array([1, 2, 3], dtype = np.int32)
arr1.dtype

dtype('float64')

In [25]:
arr2.dtype

dtype('int32')

In [26]:
# dtypes have a type name, and a number indicating the number of bits per element
## standard double-precision floating point values take up 8 bytes / 64 bits

In [27]:
# general data types:
## float   - floating point
## complex - complex
## int     - integer
## bool    - boolean
## string_ - string
## object  - object

In [28]:
# cast arrays using astype
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [29]:
# cast arrays using astype
float_arr = arr.astype(np.float64)
float_arr

array([ 1.,  2.,  3.,  4.,  5.])

In [30]:
# when casting floating point to integer, decimals are truncated
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])

In [31]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [32]:
# can do this with strings too
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype = np.string_)
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

In [33]:
numeric_strings.dtype

dtype('S4')

In [34]:
numeric_strings.shape

(3,)

In [35]:
# you can also use another array's dtype
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype = np.float64)
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [36]:
# astype always creates a new array, even if the new dtype is the same as the old dtype

In [37]:
# operations between arrays and scalars
# you can vectorize arrays
# arithmetic operations between equal-size arrays applies the operation element-wise
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [38]:
arr * arr

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [39]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [40]:
# arrays with scalars operate as you would expect
1 / arr

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.25      ,  0.2       ,  0.16666667]])

In [41]:
arr ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

In [42]:
# operations between differently-sized arrays is called broadcasting - discussed in Chapter 12.
# indexing works similarly to lists
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
arr[5]

5

In [44]:
arr[5:8]

array([5, 6, 7])

In [45]:
arr[5:8] = 12

In [46]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [47]:
# array slices are views on the original array, so if you change the slice, you change the array
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [48]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [49]:
# to copy an array slice, you need to explicitly copy it
arr[5:8].copy()

array([64, 64, 64])

In [50]:
# higher-dimensional arrays, you have many more options
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [51]:
arr2d[2]

array([7, 8, 9])

In [52]:
arr2d[0]

array([1, 2, 3])

In [53]:
# access individual elements multiple ways
arr2d[0][2]

3

In [54]:
arr2d[0, 2]

3

In [55]:
# in multidimensional arrays, if you omit later indices, the returned object will be a 
## lower-dimensional ndarray consisting of all the data along the higher dimensions.
## so in the 2 x 2 x 3 array arr3d
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [56]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [57]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [58]:
arr3d[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [59]:
# both scalars and arrays can be assigned to arr3d[0]
old_values = arr3d[0].copy()
arr3d[0] = 42

In [60]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [61]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [62]:
# arr3d[1, 0] gives you all the values whose indices start with (1, )
arr3d[1, 0]

array([7, 8, 9])

In [63]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [64]:
# you can slice one or more axes, and mix integers
# how slicing works
## slicing end indices takes everything UP TO BUT NOT INCLUDING the end index
## ALL start indices start from 0.
##
## a[start:end] # items start through end - 1
## a[start:]    # items start through the rest of the array
## a[:end]      # items from the beginning through end - 1
## a[:]         # a copy of the whole array
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [66]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [67]:
# colons mean take the entire axis
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [68]:
arr2d[:, :2]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [69]:
arr2d[:, :3]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [70]:
# boolean indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

In [71]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='|S4')

In [72]:
data

array([[ 1.37263431,  0.71197426, -1.41788934,  0.00379994],
       [ 1.70670085,  2.33952857,  0.08065459, -1.66178864],
       [-0.58019049,  1.23781958,  0.23753376,  0.67056391],
       [ 0.61108414, -0.1135595 , -0.00279405, -0.11884423],
       [-0.26795884, -1.10547075, -0.13532957,  0.95213094],
       [-1.29675718,  0.01208061,  0.4949801 ,  0.73600728],
       [ 0.76854256,  0.69810549, -0.97031274,  0.57379679]])

In [73]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [74]:
# now use that boolean to get data out of the array
## this creates a copy of the data 
data[names == 'Bob']

array([[ 1.37263431,  0.71197426, -1.41788934,  0.00379994],
       [ 0.61108414, -0.1135595 , -0.00279405, -0.11884423]])

In [75]:
# The boolean array must be the same length as the axis that it's indexing. You can mix them with indices or slices:
data[names == 'Bob', 2:]

array([[-1.41788934,  0.00379994],
       [-0.00279405, -0.11884423]])

In [76]:
data[names == 'Bob', 3]

array([ 0.00379994, -0.11884423])

In [77]:
# selecting everything but the matches:
## use != or -
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [78]:
data[-(names == 'Bob')]

array([[ 1.70670085,  2.33952857,  0.08065459, -1.66178864],
       [-0.58019049,  1.23781958,  0.23753376,  0.67056391],
       [-0.26795884, -1.10547075, -0.13532957,  0.95213094],
       [-1.29675718,  0.01208061,  0.4949801 ,  0.73600728],
       [ 0.76854256,  0.69810549, -0.97031274,  0.57379679]])

In [79]:
# create filters with & and |
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [80]:
data[mask]

array([[ 1.37263431,  0.71197426, -1.41788934,  0.00379994],
       [-0.58019049,  1.23781958,  0.23753376,  0.67056391],
       [ 0.61108414, -0.1135595 , -0.00279405, -0.11884423],
       [-0.26795884, -1.10547075, -0.13532957,  0.95213094]])

In [81]:
# setting values works logically
data[data < 0] = 0

In [82]:
data

array([[ 1.37263431,  0.71197426,  0.        ,  0.00379994],
       [ 1.70670085,  2.33952857,  0.08065459,  0.        ],
       [ 0.        ,  1.23781958,  0.23753376,  0.67056391],
       [ 0.61108414,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.95213094],
       [ 0.        ,  0.01208061,  0.4949801 ,  0.73600728],
       [ 0.76854256,  0.69810549,  0.        ,  0.57379679]])

In [83]:
data[names != 'Joe'] = 7

In [84]:
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 1.70670085,  2.33952857,  0.08065459,  0.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.01208061,  0.4949801 ,  0.73600728],
       [ 0.76854256,  0.69810549,  0.        ,  0.57379679]])

In [85]:
# fancy indexing uses integer arrays
## using an 8 x 4 array:
arr = np.empty((8, 4))

In [86]:
for i in range(8):
    arr[i] = i

In [87]:
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [88]:
# you can select a subset of rows, passing a list or ndarray of integers specifying the order:
arr[[4, 3, 0, 6]]

array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [89]:
# using negative indices selects rows from the end:
arr[[-3, -5, -7]]

array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])

In [90]:
# passing multiple index arrays does something different - selects a 
# 1D array of elements corresponding to each tuple of indices

In [91]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [92]:
# fancy indexing selects the elements that are at the intersection of the two arrays
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [93]:
# if you want to get a rectangle by selecting a subset of the matrix's rows and columns:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [94]:
# np.ix_ also does this
arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [95]:
# transpose returns a view without copying. transpose and t
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [96]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [97]:
arr.transpose

<function transpose>

In [98]:
# you'll use this a lot, especially in matrix computations
arr = np.random.randn(6, 3)

In [99]:
arr

array([[-1.25252172,  1.07298317, -0.42742947],
       [ 0.21925949,  0.1051042 ,  0.44833288],
       [ 0.23743815, -0.56696518, -0.39657852],
       [ 1.02416924, -1.44911007, -0.74786527],
       [ 0.1823076 , -0.87206285,  0.14749626],
       [ 0.43560394,  0.04872232,  1.07227098]])

In [100]:
np.dot(arr.T, arr)

array([[ 2.94517175, -3.07740281,  0.26753761],
       [-3.07740281,  4.34657678,  0.82069988],
       [ 0.26753761,  0.82069988,  2.27179552]])

In [101]:
# ndarray also has a method, swapaxes - which takes a pair of axis numbers
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [102]:
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [103]:
arr.swapaxes(2, 1)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [104]:
arr.swapaxes(0,1)

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [105]:
arr.swapaxes(0,2)

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [106]:
# Universal functions - fast element-wise operations on data in ndarrays.
# They are fast vectorized wrappers for simple functions that take one or more scalar values, and produce one or more scalar results
arr = np.arange(10)
# unary ufunc
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])

In [107]:
# unary ufunc
np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

In [108]:
# binary ufuncs take 2 arrays and return a single array
x = np.random.randn(8)
y = np.random.randn(8)
x

array([ 0.56985216, -0.12273709,  0.3482354 , -0.36572913,  0.43518796,
        0.25909505,  0.04437445,  0.36234959])

In [109]:
y

array([ 0.11383927, -0.51595035, -0.46252442,  1.51795817, -1.09738555,
       -0.72697697,  0.55572301, -0.12099813])

In [110]:
np.maximum(x, y) # element-wise maximum

array([ 0.56985216, -0.12273709,  0.3482354 ,  1.51795817,  0.43518796,
        0.25909505,  0.55572301,  0.36234959])

In [111]:
# for fun, compare these values to each of the arrays
z = np.maximum(x, y)
z == x

array([ True,  True,  True, False,  True,  True, False,  True], dtype=bool)

In [112]:
z == y

array([False, False, False,  True, False, False,  True, False], dtype=bool)

In [113]:
# some ufuncs return multiple arrays. modf - returns the fractional and integral parts of a floating point array
arr = np.random.randn(7) * 5
arr

array([-0.09131136,  2.6537395 ,  6.86864098,  4.28564953,  7.70417022,
        5.86245626, -6.86509378])

In [114]:
np.modf(arr)

(array([-0.09131136,  0.6537395 ,  0.86864098,  0.28564953,  0.70417022,
         0.86245626, -0.86509378]), array([-0.,  2.,  6.,  4.,  7.,  5., -6.]))

In [115]:
# Unary ufuncs list
## abs, fabs               - absolute value
## sqrt                    - square root
## square                  - square
## exp                     - e^x of each element
## log, log10, log2, log1p - natural log, other logs
## sign                    - sign of each element (1, 0, or -1)
## ceil                    - smallest integer greater than or equal to each element
## floor                   - largest integer less than or equal to each element
## rint                    - round elements to the nearest integer, preserving the dtype
## modf                    - return the fractional and the integer parts of array as separate arrays
## isnan                   - boolean array indicating whether each value is Not a Number
## isfinite, isinf         - boolean array indicating whether each element is finite or infinite
## cos, cosh, sin, sinh    
## tan, tanh,              
## arccos, arccosh,        - regular and inverse trigonometric functions
## arcsin, arcsinh,        
## arctan, arctanh
## logical_not             - compute the truth value of not x element-wise. Equivalent to -arr

In [116]:
# Binary ufuncs list
## add                  - add corresponding elements
## subtract             - subtract elements in the second array from the first array
## multiply             - multiply
## divide, floor_divide - divide or floor divide (truncate the remainder)
## power                - raise elements in the first array to powers indicated in the second array
## maximum, fmax        - element-wise maximum. fmax ignores NaN
## minimum, fmin        - element-wise minimum. fmin ignores NaN
## mod                  - element-wise modulus (remainder of division)
## copysign             - copy sign of values in second argument to values in first argument
## greater, greater_equal
## less, less_equal,    - element-wise comparisons, returning a boolean array
## equal, not_equal,
## logical_and,         
## logical_or,          - 
## logical_xor

In [117]:
# Vectorization makes operations 1-2 orders of magnitude faster than their pure equivalents.
# example - we want to evaluate a function across a grid of values
# np.meshgrid takes two 1D arrays and produces two 2D matrices corresponding to all pairs of x, y in the arrays
points = np.arange(-5, 5, 0.01) # array of 1000 equally-spaced points
xs, ys = np.meshgrid(points, points)
ys

array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ..., 
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])

In [118]:
# you can evaluate it the same way you would write it with two points
import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
z

array([[ 7.07106781,  7.06400028,  7.05693985, ...,  7.04988652,
         7.05693985,  7.06400028],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       ..., 
       [ 7.04988652,  7.04279774,  7.03571603, ...,  7.0286414 ,
         7.03571603,  7.04279774],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568]])

In [119]:
# instantiate a plot object
plt.imshow(z, cmap = plt.cm.gray); plt.colorbar()

<matplotlib.colorbar.Colorbar instance at 0x10efa03f8>

In [120]:
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

<matplotlib.text.Text at 0x10edadb50>

In [121]:
# now display the plot
plt.show()

In [125]:
# conditional logic in arrays
# use numpy.where for if-else conditions
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [126]:
# suppose we want to take from x when cond = TRUE, and y when it's false
# this is a list comprehension
result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]
result

[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

In [127]:
# but list comprehensions are slow for large arrays.
# they also don't work with multidimensional arrays.
# you can do this with np.where:
result = np.where(cond, xarr, yarr)
result

array([ 1.1,  2.2,  1.3,  1.4,  2.5])

In [128]:
# The second and third arguments to np.where don't need to be arrays - either or both can be scalars
# format:
# np.where(boolean_array, scalar_or_array, scalar_or_array)

In [129]:
# practical: matrix of randomly generated data, 
# want to replace all positive values with 2 and all negative values with -2
arr = np.random.randn(4, 4)
arr

array([[ 0.0979479 , -1.05720569, -1.10985086, -0.61117555],
       [ 0.21839026, -0.81199063,  0.41690479, -0.47834149],
       [-0.99941526,  1.60685265, -0.25413731, -0.82272472],
       [ 0.17434054,  0.68515228,  0.35949819,  0.8351983 ]])

In [130]:
# replace all positive values with 2 and all negative values with -2

np.where(arr > 0, 2, -2)

array([[ 2, -2, -2, -2],
       [ 2, -2,  2, -2],
       [-2,  2, -2, -2],
       [ 2,  2,  2,  2]])

In [131]:
# replace all positive values with 2, but keep all negative values
np.where(arr > 0, 2, arr)

array([[ 2.        , -1.05720569, -1.10985086, -0.61117555],
       [ 2.        , -0.81199063,  2.        , -0.47834149],
       [-0.99941526,  2.        , -0.25413731, -0.82272472],
       [ 2.        ,  2.        ,  2.        ,  2.        ]])

In [132]:
# can also be used for more complicated situations
result = []
for i in range(n):
    if cond1[i] and cond2[i]:
        result.append(0)
    elif cond1[i]:
        result.append(1)
    elif cond2[i]:
        result.append(2)
    else:
        result.append(3)
# note, this code won't run

NameError: name 'n' is not defined

In [133]:
# we can replace this with a much more concise np.where statement:
np.where(cond1 & cond2, 0,
         np.where(cond1, 1,
                  np.where(cond2, 2, 3)))
# this also won't run

NameError: name 'cond1' is not defined

In [134]:
# we can also take advantage of the fact that Python treats booleans as 
# 1s and 0s in calculations, and just do it as a formula
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)

NameError: name 'cond1' is not defined

In [135]:
# aggregations (reductions) are also available
# sum, mean, std
arr = np.random.randn(5, 4)
arr.mean()

-0.094679893435377804

In [140]:
arr

array([[-0.43006826, -1.2348602 ,  1.77250531,  0.62421627],
       [-1.29385987,  0.75365476, -1.49737822,  1.18388283],
       [ 0.55928252, -0.85252726, -1.04229256, -1.84237925],
       [ 0.83149508, -0.55445209,  0.68171811, -1.32748669],
       [ 1.18412355,  0.24416333,  0.7299672 , -0.38330244]])

In [136]:
np.mean(arr)

-0.094679893435377804

In [137]:
arr.sum()

-1.8935978687075561

In [138]:
np.sum(arr)

-1.8935978687075561

In [142]:
# both mean and sum take an optional axis argument
arr.mean(axis=0)

array([ 0.17019461, -0.32880429,  0.12890397, -0.34901386])

In [143]:
arr.mean(axis=1)

array([ 0.18294828, -0.21342512, -0.79447913, -0.0921814 ,  0.44373791])

In [145]:
# other methods like cumsum and cumprod do not aggregate
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr.cumsum(0)

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])

In [147]:
arr.cumprod(1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

In [None]:
# basic array statistical methods
# sum - sum all the elements in the array or along an axis. Zero-length arrays have sum 0
# mean - arithmetic mean. Zero-length arrays have NaN mean

