In [1]:
import numpy as np

# Processing speed difference between NumYy array and python list
my_arr = np.arange(1000000)
my_list = list(range(1000000))

%time for _ in range(10): my_arr2 = my_arr * 2
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 44.8 ms
Wall time: 1.92 s


In [2]:
# ndarray (N-dimensional) array object
data = np.random.randn(2, 3)  # Generate data (2 rows, 3 columns)
print(data, '\n')             # random data

# arrays can perform operations as if they were scalars
print(data * 10, '\n')        # multiply each element by 10
print(data + data, '\n')      # add each element to each other

# shape and data type of the array
print(data.shape)
print(data.dtype)

[[ 0.02047053  0.6479785   0.56160457]
 [ 3.47937034  0.01303058 -0.48226316]] 

[[ 0.20470531  6.47978497  5.61604571]
 [34.79370341  0.13030584 -4.82263157]] 

[[ 0.04094106  1.29595699  1.12320914]
 [ 6.95874068  0.02606117 -0.96452631]] 

(2, 3)
float64


In [3]:
# array() function - converts a sequence object into an ndarray
data1 = [6, 7.5, 8, 0, 1]  # convert a list to an ndarray
arr1 = np.array(data1)
print(type(arr1))
print(arr1, '\n')

# convert nested list, of equal length, into a multidemensional array
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]] # Nested list
arr2 = np.array(data2)
print(type(arr2))
print(arr2)

# shape, type and dimensions of the arrays
print("\nNumber of dimensions for arr1 = ", arr1.ndim)
print("arr1 shape = ", arr1.shape)  # 1 row, 5 columns
print("arr1 type = ", arr1.dtype)  

print("\nNumber of dimensions for arr2 = ", arr2.ndim)
print("arr2 shape = ", arr2.shape)  # 2 rows, 4 columns
print("arr2 type = ", arr2.dtype)  

<class 'numpy.ndarray'>
[6.  7.5 8.  0.  1. ] 

<class 'numpy.ndarray'>
[[1 2 3 4]
 [5 6 7 8]]

Number of dimensions for arr1 =  1
arr1 shape =  (5,)
arr1 type =  float64

Number of dimensions for arr2 =  2
arr2 shape =  (2, 4)
arr2 type =  int32


ndarray creation and initalization functions
![NPArrayCreateSM.jpg](attachment:NPArrayCreateSM.jpg)

In [4]:
# create and initialize ndarrays
arr3 = np.zeros(15) # one dimensional array initialized to zero
print(arr3)

arr3 = np.ones(15)   # one dimensional array initialized to one
print(arr3)

arr3 = np.arange(15) # one dim array intialized with a range of values
print(arr3, '\n')

arr4 = np.zeros((3, 6)) # Create multidemensional array using a tuple
print("shape =", arr4.shape)
print("dimenstions = ", arr4.ndim)
print(arr4, '\n')

# create array with no initialization. Don't assume it will be zeros
arr5 = np.empty((2, 3, 2))     
print("shape = ", arr5.shape)
print("dimenstions = ", arr5.ndim)
print(arr5, '\n')

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] 

shape = (3, 6)
dimenstions =  2
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]] 

shape =  (2, 3, 2)
dimenstions =  3
[[[1.53325644e-311 3.16202013e-322]
  [0.00000000e+000 0.00000000e+000]
  [0.00000000e+000 7.72323725e-043]]

 [[3.14390875e+179 3.72534543e-057]
  [1.10656501e-046 1.36971464e-071]
  [7.11631999e-038 5.99022668e-038]]] 



NumPy Data Types
![NPDataTypesSM.jpg](attachment:NPDataTypesSM.jpg)

In [5]:
# ndarray data types
arr1 = np.array([1, 2, 3], dtype = np.float64)
arr2 = np.array([1, 2, 3], dtype = np.int32)
print(arr1.dtype)
print(arr2.dtype, '\n')

# cast using 'astype()' method
arr = np.array([1, 2, 3, 4, 5])
print(arr.dtype, arr)
float_arr = arr.astype(np.float64)
print(float_arr.dtype, float_arr, '\n')

# Elements are truncated if you cast a floating point to an int
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
print(arr)
print(arr.astype(np.int32), '\n')

# Strings can be converted to numbers
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype = np.string_)
# Note: NumPy aliases Python data types to it's own data types
# Here I use Python 'float' instead of 'np.float64'
print(numeric_strings.astype(float), '\n') 

# use an array's dtype to create another array
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype = np.float64)
print(calibers.dtype)
int_arr = int_array.astype(calibers.dtype)
print(int_arr.dtype, int_arr, '\n')

# specify data type using shorthands as enumerated in table above
empty_uint32 = np.empty(8, dtype='u4')
print(empty_uint32.dtype, empty_uint32)

float64
int32 

int32 [1 2 3 4 5]
float64 [1. 2. 3. 4. 5.] 

[ 3.7 -1.2 -2.6  0.5 12.9 10.1]
[ 3 -1 -2  0 12 10] 

[ 1.25 -9.6  42.  ] 

float64
float64 [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.] 

uint32 [1 2 3 4 5 6 7 8]


In [6]:
# Vectorization - Perform math operations

# NumPy arrays operations are performed element-wise
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
print(arr, '\n')
print(arr * arr, '\n')  # multiply arrays
print(arr - arr, '\n')  # subtract arrays

# Scalar operations are applied to each element in the array
print(1 / arr)
print(arr ** 0.5, '\n')

# Array comparisons create boolean arrays
arr2 = np.array([[0., 4., 1.,], [7., 2., 12.]])
print(arr2)
print(arr2 > arr)

[[1. 2. 3.]
 [4. 5. 6.]] 

[[ 1.  4.  9.]
 [16. 25. 36.]] 

[[0. 0. 0.]
 [0. 0. 0.]] 

[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]] 

[[ 0.  4.  1.]
 [ 7.  2. 12.]]
[[False  True False]
 [ True False  True]]


In [7]:
# indexing and slicing
# Slicing on one dimensional arrays work like Python lists
arr = np.arange(10)
print(arr)
print(arr[5])
print(arr[5:8], '\n')

# assigning a scalar to a slice copies the value to each sliced element
arr[5:8] = 12
print(arr, '\n')

# Note: unlike Python Lists, sliced array changes are made to source array
arr_slice = arr[5:8]  # get a slice of 'arr'
print(arr_slice)
arr_slice[1] = 12345  # change made to 'arr_slice'
print(arr, '\n')            # is reflected in 'arr'

# assign a value to every element of an array using a 'bare' [:] slice
arr_slice[:] = 64
print(arr, '\n')

# Note: to copy a slice to another array, use '.copy()' method
arr_slice_copy = arr[5:8].copy()
print(arr_slice_copy)

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7] 

[ 0  1  2  3  4 12 12 12  8  9] 

[12 12 12]
[    0     1     2     3     4    12 12345    12     8     9] 

[ 0  1  2  3  4 64 64 64  8  9] 

[64 64 64]


axis 0 = row, axis 1 = column
![NPIndexingSM.jpg](attachment:NPIndexingSM.jpg)

In [8]:
# indexing on multi-dimensional arrays
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) # two dimensional
print(arr2d, '\n')
print(arr2d[2], '\n')

# can use comma-seperated list of indices to select individual elements
print(arr2d[0][2])  # both produce same results
print(arr2d[0, 2], '\n')

# create a (2, 2, 3)
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(arr3d, '\n')
# but specifing only one indice. results in a lower dimensional ndarray
print(arr3d[0], '\n') # a (2 row, 3 column) array

# You may assign both scalar values and arrays
old_values = arr3d[0].copy()
arr3d[0] = 42 # assigns '42' to all elements of 'arr3d[0]'
print(arr3d, '\n')

arr3d[0] = old_values # can copy a matrix to a matrix to restore 'arr3d[0]'
print(arr3d, '\n')

# providing two indicies results in a one dimensional arrary at (1,0)
print(arr3d[1, 0])
# This is the same as double index
x = arr3d[1]
print(x[0])

[[1 2 3]
 [4 5 6]
 [7 8 9]] 

[7 8 9] 

3
3 

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]] 

[[1 2 3]
 [4 5 6]] 

[[[42 42 42]
  [42 42 42]]

 [[ 7  8  9]
  [10 11 12]]] 

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]] 

[7 8 9]
[7 8 9]


![NP2dSliceSM.jpg](attachment:NP2dSliceSM.jpg)
Two dimenstional array slicing

In [9]:
# two dimensional indexing with slicing
print(arr)         # Slicing 1 dimensional array
print(arr[1:6], '\n')

#[row, column]
print(arr2d, '\n')        # Slicing 2 dimenstional array is a bit different
print(arr2d[:2], '\n')     # first 2 rows of 'arr2d'
print(arr2d[:2, 1:], '\n') # last 2 columns of the first 2 rows of 'arr2d'
print(arr2d[1, :2], '\n') # First 2 columns of 2nd row (0 based)
print(arr2d[:2, 2], '\n') # 3rd column of the last 2 rows of 'arr2d'
print(arr2d[:, :1], '\n') # 1st column only of all 3 rows of 'arr2d'

# modify a slice (original source) with assignment expression
# assign 0 to all elements of last 2 columns of the first 2 rows of 'arr2d'
arr2d[:2, 1:] = 0 
print(arr2d) 

[ 0  1  2  3  4 64 64 64  8  9]
[ 1  2  3  4 64] 

[[1 2 3]
 [4 5 6]
 [7 8 9]] 

[[1 2 3]
 [4 5 6]] 

[[2 3]
 [5 6]] 

[4 5] 

[3 6] 

[[1]
 [4]
 [7]] 

[[1 0 0]
 [4 0 0]
 [7 8 9]]


In [10]:
# Boolean Indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
print(names)
print(data, '\n')

# Lets use the 'names' to select corresponding rows in 'data'
print(names == "Bob") # first let's create a boolean array for 'Bob'
print(data[names == "Bob"], '\n') # use the array to index 'data'
# Can even mix indexes with slices
print(data[names == "Bob", 2:], '\n') #  Both rows of Last 2 'Bob' columns
print(data[names == "Bob", 3], '\n') # Both rows of Last 'Bob' column

# to index every row that is not indexed by 'Bob' use '!=' negate operator
print(names != 'Bob') # Create not 'Bob' Boolean array
print(data[~(names == 'Bob')], '\n') # negation (invert) operator '~'

cond = names == 'Bob'
print(cond)        # 'Bob' Boolean array
print(~cond, '\n') # Not 'Bob' Boolean array
print(data[~cond], '\n')

# Create a Boolean mask by combining operators using '&' (and) and '|' (or)
mask = (names == 'Bob') | (names == 'Will')
print(mask)
print(data[mask], '\n')

# modifying values using boolean arrays

# set all negative values to 0
data[data < 0] = 0
print(data, '\n')

# set rows, not indexed by 'Joe' to a value (7)
data[names != 'Joe'] = 7
print(data, '\n')

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[ 1.28530423  0.44243403  0.98958026  0.51558177]
 [ 0.19550118  0.54422588 -0.80547633  1.04672241]
 [ 0.77305883 -0.54655072  0.3834909   0.16232096]
 [-0.92783466 -0.30622293  0.31395637  0.36179079]
 [-0.65634784 -0.76358336 -0.47001278  0.72900517]
 [-1.11615433 -1.12969465 -0.31026998  0.50448995]
 [-1.65711789 -0.13383943  0.81946929  0.67308599]] 

[ True False False  True False False False]
[[ 1.28530423  0.44243403  0.98958026  0.51558177]
 [-0.92783466 -0.30622293  0.31395637  0.36179079]] 

[[0.98958026 0.51558177]
 [0.31395637 0.36179079]] 

[0.51558177 0.36179079] 

[False  True  True False  True  True  True]
[[ 0.19550118  0.54422588 -0.80547633  1.04672241]
 [ 0.77305883 -0.54655072  0.3834909   0.16232096]
 [-0.65634784 -0.76358336 -0.47001278  0.72900517]
 [-1.11615433 -1.12969465 -0.31026998  0.50448995]
 [-1.65711789 -0.13383943  0.81946929  0.67308599]] 

[ True False False  True False False False]
[False  True  True F

In [11]:
# Fancy Indexing - indexing using integer arrays - results in one dim array
arr = np.empty((8, 4)) 

for i in range(8):  # initialize the array
    arr[i] = i
print(arr, '\n')

print(arr[[4, 3, 0, 6]], '\n') # index 'arr' using an integer array
# index 'arr' using negative indicies which means start from end of 'arr'
print(arr[[-3, -5, -7]], '\n') 

# create an 8 row, 4 column matrix with elements 0 - 31
arr = np.arange(32).reshape((8, 4)) 
print(arr, '\n')

# With multiple integer arrays, extract values from array:
# (1, 0), (5, 3), (7, 1) (2, 2) whhich creates the array: [4, 23, 29, 10]
print(arr[[1, 5, 7, 2], [0, 3, 1, 2]], '\n') # [row, column]

# to get a subset of the 'arr' row & column by creating a rectangular array
print(arr[[1, 5, 7, 2], :] [:, [0, 3, 1, 2]]) # [row, column]

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]] 

[[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [0. 0. 0. 0.]
 [6. 6. 6. 6.]] 

[[5. 5. 5. 5.]
 [3. 3. 3. 3.]
 [1. 1. 1. 1.]] 

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]] 

[ 4 23 29 10] 

[[ 4  7  5  6]
 [20 23 21 22]
 [28 31 29 30]
 [ 8 11  9 10]]


In [12]:
arr = np.arange(32)
print(arr, '\n')
arr = arr.reshape((8, 4))
print(arr, '\n')

arr = np.arange(32).reshape((8, 4))
print(arr, '\n')

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31] 

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]] 

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]] 



In [13]:
# transposing and swapping
arr = np.arange(15).reshape((3, 5))
print(arr, '\n')
print(arr.T, '\n')  # Transpose 'arr' to a (5, 3) array
print(np.dot(arr.T, arr), '\n') # Useful when computing a dot product

# create a (2, 2, 4) 3 dimensional matrix with elements 0 - 15
arr = np.arange(16).reshape((2, 2, 4))
print(arr, '\n')
# transpose() accepts a tuple of numbers to reorder the axes
# from (1st, 2nd, 3rd) to (2nd, 1st, 3rd)
print(arr.transpose((1, 0, 2)), '\n') 

# use swapaxes() to transpose multi-dimenstional arrays
print(arr, '\n')
print(arr.swapaxes(1, 2))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]] 

[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]] 

[[125 140 155 170 185]
 [140 158 176 194 212]
 [155 176 197 218 239]
 [170 194 218 242 266]
 [185 212 239 266 293]] 

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]] 

[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]] 

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]] 

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]
