In [2]:
# Numpy provides:
## ndarray - fast and space-efficient multidimensional array providing vectorized arithmetic operations
##    and sophisticated broadcasting capabilities
## standard mathematical functions for fast operations on entire arrays of data without having to write loops
## tools for reading / writing array data to disk and working with memory-mapped files
## linear algebra, random number generation, and fourier transform capabilities
## tools for integrating code written in C, C++, and Fortran

In [22]:
# ndarray - n-dimensional arrays
import random
import numpy as np
# create a random array between 0 and 1, with 2 row of 3 elements
data = np.random.uniform(0,1,[2,3])
data

array([[ 0.36266433,  0.57161027,  0.31522562],
       [ 0.2206973 ,  0.52578877,  0.96649052]])

In [27]:
data * 10

array([[ 3.62664326,  5.71610275,  3.15225624],
       [ 2.20697302,  5.25788767,  9.66490518]])

In [28]:
data + data

array([[ 0.72532865,  1.14322055,  0.63045125],
       [ 0.4413946 ,  1.05157753,  1.93298104]])

In [29]:
# array is generic multidimensional container for homogeneous data
# every array has a 
## shape - a tuple indicating the size of each dimension
## dtype - an object describing the data type of the arraydata.shape

(2, 3)

In [31]:
data.shape

(2, 3)

In [32]:
data.dtype

dtype('float64')

In [33]:
# create arrays using the array function
# takes any sequence-like object (including other arrays)
# produces a new NumPy array containing the passed data
## try it on lists
data1 = [6, 7.5, 8, 0, 1]

In [34]:
arr1 = np.array(data1)

In [35]:
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [36]:
# nested sequences are converted into multidimensional arrays
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [37]:
arr2 = np.array(data2)

In [38]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [39]:
# ndim tells you how many dimensions there are
arr2.ndim

2

In [40]:
arr2.shape

(2, 4)

In [41]:
arr2.dtype

dtype('int64')

In [42]:
# other functions also create arrays
## zeros
## ones
## empty
## pass an index or a tuple
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [44]:
np.zeros((3, 6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [48]:
# sometimes np.empty passes zeros, sometimes it just passes garbage values
np.empty((2, 3, 2))

array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]]])

In [50]:
# arange is an array-valued version of the built-in Python range function
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [51]:
# standard array creation functions include:
## array             - convert input data to an ndarray
## asarray           - convert input to ndarray, but don't copy if the input is already an ndarray
## arange            - like built-in range but returns an array instead of a list
## ones, ones_like   - takes another array and produces a ones array of the same shape and dtype
## zeros, zeros_like - like ones and ones_like, but producing arrays of 0's instead
## empty, empty_like - create new arrays by allocating new memory, but don't populate with values
## eye, identity     - create a square N x N identity matrix (1's on the diagonal, 0's elsewhere)

In [53]:
# data types for arrays
arr1 = np.array([1, 2, 3], dtype = np.float64)
arr2 = np.array([1, 2, 3], dtype = np.int32)
arr1.dtype

dtype('float64')

In [54]:
arr2.dtype

dtype('int32')

In [55]:
# dtypes have a type name, and a number indicating the number of bits per element
## standard double-precision floating point values take up 8 bytes / 64 bits

In [None]:
# general data types:
## float   - floating point
## complex - complex
## int     - integer
## bool    - boolean
## string_ - string
## object  - object

In [56]:
# cast arrays using astype
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [58]:
# cast arrays using astype
float_arr = arr.astype(np.float64)
float_arr

array([ 1.,  2.,  3.,  4.,  5.])

In [59]:
# when casting floating point to integer, decimals are truncated
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])

In [60]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [63]:
# can do this with strings too
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype = np.string_)
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

In [64]:
numeric_strings.dtype

dtype('S4')

In [65]:
numeric_strings.shape

(3,)

In [67]:
# you can also use another array's dtype
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype = np.float64)
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [None]:
# astype always creates a new array, even if the new dtype is the same as the old dtype

In [68]:
# operations between arrays and scalars
# you can vectorize arrays
# arithmetic operations between equal-size arrays applies the operation element-wise
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [69]:
arr * arr

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [70]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [71]:
# arrays with scalars operate as you would expect
1 / arr

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.25      ,  0.2       ,  0.16666667]])

In [72]:
arr ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

In [78]:
# operations between differently-sized arrays is called broadcasting - discussed in Chapter 12.
# indexing works similarly to lists
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [79]:
arr[5]

5

In [80]:
arr[5:8]

array([5, 6, 7])

In [81]:
arr[5:8] = 12

In [82]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [83]:
# array slices are views on the original array, so if you change the slice, you change the array
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [85]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [86]:
# to copy an array slice, you need to explicitly copy it
arr[5:8].copy()

array([64, 64, 64])

In [89]:
# higher-dimensional arrays, you have many more options
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [90]:
arr2d[2]

array([7, 8, 9])

In [91]:
arr2d[0]

array([1, 2, 3])

In [92]:
# access individual elements multiple ways
arr2d[0][2]

3

In [93]:
arr2d[0, 2]

3

In [95]:
# in multidimensional arrays, if you omit later indices, the returned object will be a 
## lower-dimensional ndarray consisting of all the data along the higher dimensions.
## so in the 2 x 2 x 3 array arr3d
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [96]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [98]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [99]:
arr3d[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [100]:
# both scalars and arrays can be assigned to arr3d[0]
old_values = arr3d[0].copy()
arr3d[0] = 42

In [101]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [102]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [103]:
# arr3d[1, 0] gives you all the values whose indices start with (1, )
arr3d[1, 0]

array([7, 8, 9])

In [104]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [105]:
# you can slice one or more axes, and mix integers
# how slicing works
## slicing end indices takes everything UP TO BUT NOT INCLUDING the end index
## ALL start indices start from 0.
##
## a[start:end] # items start through end - 1
## a[start:]    # items start through the rest of the array
## a[:end]      # items from the beginning through end - 1
## a[:]         # a copy of the whole array
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [106]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [108]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [109]:
# colons mean take the entire axis
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [110]:
arr2d[:, :2]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [111]:
arr2d[:, :3]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [116]:
# boolean indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

In [117]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='|S4')

In [118]:
data

array([[-0.74065054, -0.87785273,  0.65480898, -0.20168362],
       [-0.22339213,  0.96250635, -0.53827651, -0.82261589],
       [-0.10761089, -2.5279502 , -0.24560855,  0.0462044 ],
       [-0.37359666, -0.56466215,  2.86107452, -0.25744252],
       [ 0.19848771, -0.01437371, -0.61132477, -1.79237388],
       [-0.7026414 ,  1.16721399,  0.8571313 , -2.17193371],
       [ 0.14312547, -0.35158962,  0.02882958,  0.65001196]])

In [119]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [120]:
# now use that boolean to get data out of the array
## this creates a copy of the data 
data[names == 'Bob']

array([[-0.74065054, -0.87785273,  0.65480898, -0.20168362],
       [-0.37359666, -0.56466215,  2.86107452, -0.25744252]])

In [121]:
# The boolean array must be the same length as the axis that it's indexing. You can mix them with indices or slices:
data[names == 'Bob', 2:]

array([[ 0.65480898, -0.20168362],
       [ 2.86107452, -0.25744252]])

In [122]:
data[names == 'Bob', 3]

array([-0.20168362, -0.25744252])

In [123]:
# selecting everything but the matches:
## use != or -
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [124]:
data[-(names == 'Bob')]

array([[-0.22339213,  0.96250635, -0.53827651, -0.82261589],
       [-0.10761089, -2.5279502 , -0.24560855,  0.0462044 ],
       [ 0.19848771, -0.01437371, -0.61132477, -1.79237388],
       [-0.7026414 ,  1.16721399,  0.8571313 , -2.17193371],
       [ 0.14312547, -0.35158962,  0.02882958,  0.65001196]])

In [125]:
# create filters with & and |
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [126]:
data[mask]

array([[-0.74065054, -0.87785273,  0.65480898, -0.20168362],
       [-0.10761089, -2.5279502 , -0.24560855,  0.0462044 ],
       [-0.37359666, -0.56466215,  2.86107452, -0.25744252],
       [ 0.19848771, -0.01437371, -0.61132477, -1.79237388]])

In [127]:
# setting values works logically
data[data < 0] = 0

In [128]:
data

array([[ 0.        ,  0.        ,  0.65480898,  0.        ],
       [ 0.        ,  0.96250635,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.0462044 ],
       [ 0.        ,  0.        ,  2.86107452,  0.        ],
       [ 0.19848771,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  1.16721399,  0.8571313 ,  0.        ],
       [ 0.14312547,  0.        ,  0.02882958,  0.65001196]])

In [129]:
data[names != 'Joe'] = 7

In [130]:
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.96250635,  0.        ,  0.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  1.16721399,  0.8571313 ,  0.        ],
       [ 0.14312547,  0.        ,  0.02882958,  0.65001196]])

In [131]:
# fancy indexing uses integer arrays
## using an 8 x 4 array:
arr = np.empty((8, 4))

In [132]:
for i in range(8):
    arr[i] = i

In [133]:
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [134]:
# you can select a subset of rows, passing a list or ndarray of integers specifying the order:
arr[[4, 3, 0, 6]]

array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [135]:
# using negative indices selects rows from the end:
arr[[-3, -5, -7]]

array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])

In [136]:
# passing multiple index arrays does something different - selects a 
# 1D array of elements corresponding to each tuple of indices

In [137]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [138]:
# fancy indexing selects the elements that are at the intersection of the two arrays
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [139]:
# if you want to get a rectangle by selecting a subset of the matrix's rows and columns:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [140]:
# np.ix_ also does this
arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [141]:
# transpose returns a view without copying. transpose and t
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [142]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [143]:
arr.transpose

<function transpose>

In [144]:
# you'll use this a lot, especially in matrix computations
arr = np.random.randn(6, 3)

In [147]:
arr

array([[ 0.94777412,  0.17880728,  0.87223478],
       [ 0.20763139, -0.3859555 , -1.01011002],
       [ 0.02946586,  0.42810432,  0.03571498],
       [ 1.22894183, -0.50901613,  0.06787444],
       [ 0.67152513,  1.45725395, -0.51448064],
       [ 0.15622995,  0.39103904,  0.95810545]])

In [148]:
np.dot(arr.T, arr)

array([[ 2.92790664,  0.51607034,  0.50561521],
       [ 0.51607034,  2.89980502,  0.15148769],
       [ 0.50561521,  0.15148769,  2.96965462]])

In [151]:
# ndarray also has a method, swapaxes - which takes a pair of axis numbers
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [152]:
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [154]:
arr.swapaxes(2, 1)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [156]:
arr.swapaxes(0,1)

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [157]:
arr.swapaxes(0,2)

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [159]:
# Universal functions - fast element-wise operations on data in ndarrays.
# They are fast vectorized wrappers for simple functions that take one or more scalar values, and produce one or more scalar results
arr = np.arange(10)
# unary ufunc
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])

In [160]:
# unary ufunc
np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

In [161]:
# binary ufuncs take 2 arrays and return a single array
x = np.random.randn(8)
y = np.random.randn(8)
x

array([ 0.09334351, -0.10800127,  1.2919862 , -1.63591931, -0.83708597,
       -0.41459199, -1.29340281, -0.91305323])

In [162]:
y

array([ 1.44185051,  1.79051415, -1.92000598, -0.04972881, -1.02455298,
       -0.3181628 , -0.48314601, -1.58581145])

In [163]:
np.maximum(x, y) # element-wise maximum

array([ 1.44185051,  1.79051415,  1.2919862 , -0.04972881, -0.83708597,
       -0.3181628 , -0.48314601, -0.91305323])

In [164]:
# for fun, compare these values to each of the arrays
z = np.maximum(x, y)
z == x

array([False, False,  True, False,  True, False, False,  True], dtype=bool)

In [165]:
z == y

array([ True,  True, False,  True, False,  True,  True, False], dtype=bool)

In [167]:
# some ufuncs return multiple arrays. modf - returns the fractional and integral parts of a floating point array
arr = np.random.randn(7) * 5
arr

array([ -3.83311238,  -2.45209149,  -1.22515766,   3.98651628,
       -10.66491957,   5.61549983,   6.18630187])

In [168]:
np.modf(arr)

(array([-0.83311238, -0.45209149, -0.22515766,  0.98651628, -0.66491957,
         0.61549983,  0.18630187]),
 array([ -3.,  -2.,  -1.,   3., -10.,   5.,   6.]))

In [None]:
# Unary ufuncs list
## abs, fabs               - absolute value
## sqrt                    - square root
## square                  - square
## exp                     - e^x of each element
## log, log10, log2, log1p - natural log, other logs
## sign                    - sign of each element (1, 0, or -1)
## ceil                    - smallest integer greater than or equal to each element
## floor                   - largest integer less than or equal to each element
## rint                    - round elements to the nearest integer, preserving the dtype
## modf                    - return the fractional and the integer parts of array as separate arrays
## isnan                   - boolean array indicating whether each value is Not a Number
## isfinite, isinf         - boolean array indicating whether each element is finite or infinite
## cos, cosh, sin, sinh    
## tan, tanh,              
## arccos, arccosh,        - regular and inverse trigonometric functions
## arcsin, arcsinh,        
## arctan, arctanh
## logical_not             - compute the truth value of not x element-wise. Equivalent to -arr

In [None]:
# Binary ufuncs list
## add                  - add corresponding elements
## subtract             - subtract elements in the second array from the first array
## multiply             - multiply
## divide, floor_divide - divide or floor divide (truncate the remainder)
## power                - raise elements in the first array to powers indicated in the second array
## maximum, fmax        - element-wise maximum. fmax ignores NaN
## minimum, fmin        - element-wise minimum. fmin ignores NaN
## mod                  - element-wise modulus (remainder of division
## copysign             - copy sign of values in second argument to values in first argument
## greater, greater_equal
## less, less_equal,    - element-wise comparisons, returning a boolean array
## equal, not_equal,
## logical_and,         
## logical_or,          - 
## logical_xor