In [1]:
"""
NumPy-based algorithms are generally 10 to 100 times faster (or more)  
than their pure Python counterparts and use significantly less memory.
"""
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
%time for _ in range(10): my_arr2 = my_arr * 2
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 18.9 ms
Wall time: 718 ms


In [2]:
import numpy as np
data = np.random.randn(2, 3)
print(data)
print(data + data)
print(data*10)
data?

[[-1.63057121  0.49934743 -0.09423495]
 [-0.42280835  0.53602723 -1.5414461 ]]
[[-3.26114242  0.99869486 -0.18846991]
 [-0.84561669  1.07205445 -3.08289221]]
[[-16.30571212   4.9934743   -0.94234954]
 [ -4.22808347   5.36027227 -15.41446104]]


In [3]:
"""
Creating ndarrays
The easiest way to create an array is to use the array function.      
This accepts any sequence-like object (including other arrays) and      
produces a new NumPy array containing the passed data
"""
lst1 = [6, 7, 8, 9, 10]
arr1 = np.array(lst1, dtype=np.int32)
print(arr1)
lst2 = [[1,2,3,4,5],[6,7,8,9,10]]
arr2 = np.array(lst2, dtype=np.float64)
print(arr2)

[ 6  7  8  9 10]
[[ 1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10.]]


In [4]:
#explicitly convert or cast an array from one dtype to another using ndarray’s astype method
#Calling astype always creates a new array (a copy of the data), even if the new dtype is the same as the old dtype.
#Any arithmetic operations between equal-size arrays applies the operation element-wise
#Comparisons between arrays of the same size yield boolean arrays
print(arr1.dtype)
float_arr = arr1.astype(np.float64)
print(float_arr.dtype)
numeric_strings = np.array(['1.25', '-9.6', '55'], dtype=np.string_)
numeric_strings = numeric_strings.astype(float)
print(numeric_strings.dtype)
print(numeric_strings)
arr3 = arr2*arr2
print(arr3)
print(arr3 > arr2)

int32
float64
float64
[ 1.25 -9.6  55.  ]
[[  1.   4.   9.  16.  25.]
 [ 36.  49.  64.  81. 100.]]
[[False  True  True  True  True]
 [ True  True  True  True  True]]


In [5]:
"""
Basic Indexing and Slicing
As you can see, if you assign a scalar value to a slice, as in arr[5:8] = 12, 
the value is propagated (or broadcasted henceforth) to the entire selection. 
An important first distinction from Python’s built-in lists is that array slices are views 
on the original array. This means that the data is not copied, and any modifications to the view 
will be reflected in the source array.
If you want a copy of a slice of an ndarray instead of a view, you will need to 
explicitly copy the array—for example, arr[5:8].copy().
"""
arr1_slice = arr1[1:3]
arr1_slice_cp = arr1[1:3].copy()
arr1_slice_cp[:] = -100
print("original before modification:", arr1)
print("sliced's copy: ", arr1_slice_cp)
print("original after sliced copy modified:", arr1)
print("sliced's original: ", arr1_slice)
arr1_slice[:] = 100
print("sliced's orignial modified, and original is:", arr1)

original before modification: [ 6  7  8  9 10]
sliced's copy:  [-100 -100]
original after sliced copy modified: [ 6  7  8  9 10]
sliced's original:  [7 8]
sliced's orignial modified, and original is: [  6 100 100   9  10]


In [6]:
"""
Multidimensional array, 2*2*3 example
"""
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print("3d array:", arr3d)
print("array's first row:", arr3d[0])
old_values = arr3d[0].copy()
arr3d[0] = 99
print("3d array again after modified:", arr3d)
arr3d[0] = old_values
print("3d array again afer modified again:", arr3d)
print("array's second row first column:", arr3d[1,0])

3d array: [[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
array's first row: [[1 2 3]
 [4 5 6]]
3d array again after modified: [[[99 99 99]
  [99 99 99]]

 [[ 7  8  9]
  [10 11 12]]]
3d array again afer modified again: [[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
array's second row first column: [7 8 9]


In [7]:
"""
slicing is along axis
"""
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d[:2])
print(arr2d[:2, 1:])
print(arr2d[1, :2])
print(arr2d[:, :1])

[[1 2 3]
 [4 5 6]]
[[2 3]
 [5 6]]
[4 5]
[[1]
 [4]
 [7]]


In [8]:
"""
Boolean Indexing
Like arithmetic operations, comparisons (such as ==) with arrays are also vectorized. 
Thus, comparing names with the string 'Bob' yields a boolean array
And then the boolean array can be passed when indexing the array
You can even mix and match boolean arrays with slices or integers 
Boolean selection will not fail if the boolean array is not the correct length
To select everything but 'Bob', you can either use != or negate the condition using ~
"""
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
print(names)
print(data)
print(names == 'Bob')
print(data[names == 'Bob'])#for 2d array's corresponding rows
print(data[names == 'Bob', 2:])#for 2d array's corresponding rows and starting from the third column
#The ~ operator can be useful when you want to invert a general condition
cond = names == 'Bob'
data[~cond]
#Selecting two of the three names to combine multiple boolean conditions, 
#use boolean arithmetic operators like & (and) and | (or)
mask = (names == 'Bob') | (names == 'Will')
print("mask:", mask)
data[data < 0] = 0
print("after changing negative values to zeros:", data)
data[names != 'Joe'] = 7
print("after updating all rows which are not 'Joe', the array:", data)

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[ 1.9047148  -0.25410388  0.32612545 -1.18280088]
 [ 1.08685278  0.40213338  1.26318534  0.50640363]
 [ 0.21936546  0.80949871  1.41862094 -0.93966809]
 [-0.3278349   0.81250673 -0.0633565  -0.38470565]
 [-0.82506961  0.80234987  0.57779251 -0.37057198]
 [-0.4037065   1.72320309 -0.78959071  0.46366894]
 [ 1.4168506   1.08792382 -0.34413235  1.56537211]]
[ True False False  True False False False]
[[ 1.9047148  -0.25410388  0.32612545 -1.18280088]
 [-0.3278349   0.81250673 -0.0633565  -0.38470565]]
[[ 0.32612545 -1.18280088]
 [-0.0633565  -0.38470565]]
mask: [ True False  True  True  True False False]
after changing negative values to zeros: [[1.9047148  0.         0.32612545 0.        ]
 [1.08685278 0.40213338 1.26318534 0.50640363]
 [0.21936546 0.80949871 1.41862094 0.        ]
 [0.         0.81250673 0.         0.        ]
 [0.         0.80234987 0.57779251 0.        ]
 [0.         1.72320309 0.         0.46366894]
 [1.4168506  1.087923

### Fancy Indexing
fancy indexing, unlike slicing, always copies the data into a new array

In [9]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
"""
To select out a subset of the rows in a particular order, 
you can simply pass a list or ndarray of integers specifying the desired order
"""
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [10]:
"""
Passing multiple index arrays does something slightly different; 
it selects a one-dimensional array of elements corresponding to each tuple of indices
"""
arr = np.arange(32).reshape((8, 4))
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

first create an array of selected rows from arr, assuming called arrTemp
then create another array using all of the rows from arrTemp and re-arrange its columns

In [11]:
"""
rectangular region formed by selecting a subset of the matrix’s rows and columns
"""
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

### Transposing Array

In [12]:
arr = np.arange(15).reshape((3, 5))
arr.T
np.dot(arr.T, arr)

array([[125, 140, 155, 170, 185],
       [140, 158, 176, 194, 212],
       [155, 176, 197, 218, 239],
       [170, 194, 218, 242, 266],
       [185, 212, 239, 266, 293]])

### Switching array axes

In [16]:
arr = np.arange(16).reshape((2, 2, 4))
print(arr)
print(arr.transpose((1, 0, 2)))#rearranged as second axis first, the first axis second, and the last axis unchanged
"""
ndarray has the method swapaxes, 
which takes a pair of axis numbers and switches the indicated axes to rearrange the data
swapaxes similarly returns a view on the data without making a copy
"""
arrxs=arr.swapaxes(1, 2) #swap the second and third axis
print(arrxs)
print(arr)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]]
[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]
[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
