### Source: Python Data Science Handbook, Jake VanderPlas, O'Reilly
### Chapter 2: Introduction to NumPy

Complete list of examples: https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/02.02-The-Basics-Of-NumPy-Arrays.ipynb

### Fixed-Type Arrays in Python

In [1]:
from sys import getsizeof
l = list(range(10))
print(l)
getsizeof(l)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


208

In [2]:
import array
l = list(range(10))
a = array.array('i', l) # 'i' is the type code for integer
getsizeof(a)

104

In [70]:
l2 = [1, 2, 'three']
l2

[1, 2, 'three']

The Python array object provides more efficient storage of array-based data.

NumPy adds to this efficient operations on data

### Creating NumPy arrays from Python lists

In [3]:
import numpy as np

a_np = np.array(l)
print(a_np)
getsizeof(a_np)

[0 1 2 3 4 5 6 7 8 9]


176

In [4]:
a = np.array([1, 3, 4, 5, 9, 7])
print(a)
a.nbytes

[1 3 4 5 9 7]


48

In [73]:
# Explicitly set data type
b = np.array([1, 3, 4, 5, 9, 7], dtype='int32')
print(b)
b.nbytes

[1 3 4 5 9 7]


24

### NumPy arrays can be multidimensional (list of lists)

In [74]:
[[i, j] for i in [4,5,6] for j in [7,8,9]]

[[4, 7], [4, 8], [4, 9], [5, 7], [5, 8], [5, 9], [6, 7], [6, 8], [6, 9]]

In [5]:
# 3x3 array
np.array([range(i, i+3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Creating arrays from scratch

In [6]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [7]:
# 3x5 array of floating point numbers
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
# 3x5 array of approx value for pi
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [9]:
# array of numbers, increments of 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [10]:
# Identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### NumPy Array Attributes

In [81]:
import numpy as np
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6) # one-dimensional array
x2 = np.random.randint(10, size=(3,4)) # two-dimensional array
x3 = np.random.randint(10, size=(3,4,5)) # three-dimensional array

print(x3)


[[[8 1 5 9 8]
  [9 4 3 0 3]
  [5 0 2 3 8]
  [1 3 3 3 7]]

 [[0 1 9 9 0]
  [4 7 3 2 7]
  [2 0 0 4 5]
  [5 6 8 4 1]]

 [[4 9 8 1 1]
  [7 9 9 3 6]
  [7 2 0 3 5]
  [9 4 4 6 4]]]


In [82]:
print(x3.ndim, x3.shape, x3.size)

3 (3, 4, 5) 60


### Accessing individual elements

In [83]:
x1

array([5, 0, 3, 3, 7, 9])

In [84]:
x1[0]

5

In [85]:
x1[-2] # second element from end

7

In [86]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [87]:
x2[0]

array([3, 5, 2, 4])

In [88]:
x2[0,0]

3

In [89]:
# modify a value
x2[0,0] = 12
x2[0,1] = 20
x2

array([[12, 20,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [90]:
# operate on all elements, array arithmetic
x2 * 10

array([[120, 200,  20,  40],
       [ 70,  60,  80,  80],
       [ 10,  60,  70,  70]])

### Computations on NumPy Arrays: Universal Functions (UFuncs)

In [91]:
np.random.seed(0)

def compute_reciprocals(values):
    # initialize an empty array
    output = np.empty(len(values))
    for i in range(len(values)):
        # assign values to the array
        output[i] = 1.0 / values[i] 
    return output

values = np.random.randint(1, 10, size=5)
print(values)

compute_reciprocals(values)

[6 1 4 4 8]


array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [92]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array) # 2 seconds, really slow!

2.25 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Introducing UFuncs

UFuncs: quickly execute repeated operations on elements in NumPy arrays

In [93]:
# these two statements produce identical results
print(compute_reciprocals(values))
print(1/values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [94]:
%timeit 1/big_array # 1.37 ms, much faster!

1.6 ms ± 97.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Array slicing

In [137]:
x = np.arange(10)
x

array([93, 88, 82, 93, 67, 74, 56, 91, 76, 75])

In [138]:
# first five elements
x[:5]

array([93, 88, 82, 93, 67])

In [139]:
# elements after 5
x[5:]

array([74, 56, 91, 76, 75])

In [140]:
# middle of array
x[4:7]

array([67, 74, 56])

In [141]:
# every other element
x[::2]

array([93, 82, 67, 56, 76])

In [142]:
# reverse elements
x[::-1]

array([75, 76, 91, 56, 74, 67, 93, 82, 88, 93])

In [124]:
x2 = np.random.randint(10, size=(3,4))
x2

array([[7, 6, 8, 0],
       [0, 3, 3, 4],
       [8, 6, 5, 5]])

In [125]:
# first column
x2[0:2, 0]

array([7, 0])

In [126]:
# second column
x2[:, 1]

array([6, 3, 6])

In [127]:
# modifying slice changes the original array, the slice is just a view!
x2[:, 1] = -5

In [128]:
x2

array([[ 7, -5,  8,  0],
       [ 0, -5,  3,  4],
       [ 8, -5,  5,  5]])

In [129]:
# copy of an array
first_column = x2[:, 0].copy()
first_column

array([7, 0, 8])

In [130]:
first_column = first_column * 2
first_column

array([14,  0, 16])

In [135]:
# 
first_column_orig = x2[:, 0]
first_column_orig = first_column_orig * 2
first_column_orig

x2

array([[ 7, -5,  8,  0],
       [ 0, -5,  3,  4],
       [ 8, -5,  5,  5]])

### Reshaping arrays

In [110]:
array1 = np.arange(1,10)
array1

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [111]:
grid = array1.reshape((3,3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Concatenating arrays

In [112]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

### Splitting arrays

In [143]:
x = np.random.randint(10, 100, size=10)
x

array([97, 85, 64, 55, 10, 51, 11, 57, 88, 29])

In [144]:
x1, x2 = np.split(x, [3])
print(x1)
print(x2)

[97 85 64]
[55 10 51 11 57 88 29]


In [145]:
x1, x2, x3 = np.split(x, [3, 5])
print(x1)
print(x2)
print(x3)

[97 85 64]
[55 10]
[51 11 57 88 29]


In [146]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [147]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [118]:
type(grid)

numpy.ndarray

In [14]:
np.eye(5) + 20

array([[21., 20., 20., 20., 20.],
       [20., 21., 20., 20., 20.],
       [20., 20., 21., 20., 20.],
       [20., 20., 20., 21., 20.],
       [20., 20., 20., 20., 21.]])