### NumPy Introduction

To enable dynamic typing, Python represents numeric data using complex data types that contain:

- value
- reference count to assist with memory allocation
- type
- size

This becomes inefficient when handling large amounts of data.

Python introduced the array type. Unlike lists, it improves efficiency by only allowing a static type. It helps, but it doesn't compare to NumPy.

NumPy offers both efficient storage and operations for fixed-type multi-dimensional arrays.

In [1]:
import numpy as np

In [20]:
# numpy will implicitly define array type
a = np.array([1,2,3,4,5])
a.dtype

dtype('int64')

In [21]:
# and will upcast ints to floats if necessary
b = np.array([1.5,2,3,4,5])
b.dtype

dtype('float64')

In [15]:
# explicitly define array type
np.array([1, 2, 3, 4, 5], dtype='float32')

array([1., 2., 3., 4., 5.], dtype=float32)

In [23]:
# nested lists produce multi-dimensional arrays
np.array([range(i, i + 5) for i in [2, 4, 6]])

array([[ 2,  3,  4,  5,  6],
       [ 4,  5,  6,  7,  8],
       [ 6,  7,  8,  9, 10]])

In [28]:
# it is more efficient to create arrays from scratch
# using numpy's built in routines
print(np.zeros(10, dtype=int))
print(np.zeros(10, dtype=float))
print(np.ones((3, 5), dtype=float))

[0 0 0 0 0 0 0 0 0 0]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [31]:
# we can pre-populat multi-dimensional arrays with 0s, 1s, or others
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [36]:
# create linear sequence with start, stop, and steps
np.arange(0, 22, 3)

array([ 0,  3,  6,  9, 12, 15, 18, 21])

In [37]:
# create random arrays
np.random.random((4, 2))

array([[0.16463821, 0.81232671],
       [0.13306596, 0.00894756],
       [0.7491335 , 0.66290855],
       [0.38047926, 0.07294105]])

In [39]:
# create arrays over a normal distribution
np.random.normal(1.0, 1.0, (3, 3))

array([[ 1.94515689, -0.95590339,  2.88919565],
       [ 0.50624771, -0.11748481,  1.87243799],
       [ 0.03407081,  0.31499742,  1.03161336]])

In [40]:
# creating an unitialized array will fill array with
# w/e values are stored at that memory location
np.empty(3)

array([0., 0., 0.])

In [41]:
# identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

#### NumPy Data Types

- bool_
- int_
- intc
- intp
- int8
- int16
- int32
- int64
- uint8
- uint16
- uint32
- uint64
- float_
- float16
- float32
- float64
- complex_
- complex64
- complex128

#### Array Attributes

In [42]:
x1 = np.random.randint(10, size=6)
x2 = np.random.randint(10, size=(3, 4))
x3 = np.random.randint(10, size=(3, 4, 5))

In [51]:
print('x3 dimensions: ', x3.ndim)
print('x3 shape: ', x3.shape)
print('x3 size: ', x3.size)
print('x3 dtype: ', x3.dtype)

x3 dimensions:  3
x3 shape:  (3, 4, 5)
x3 size:  60
x3 dtype:  int64


In [52]:
# item size = size in bytes of each element
print('itemsize = number of bytes per element: ', x3.itemsize, ' bytes')
print('nbytes = total size of array: ', x3.nbytes, ' bytes')

itemsize = number of bytes per element:  8  bytes
nbytes = total size of array:  480  bytes


#### Array Indexing

In [55]:
# can index like regular Python lists
x1[0]

1

In [67]:
# negative indices index from the end of the array
print('x1:', x1)
print('x1[-1]: ', x1[-1])
print('x1[-6]: ', x1[-6])

x1: [1 1 0 1 1 2]
x1[-1]:  2
x1[-6]:  1


In [68]:
# index multidimensional arrays using comma separated tuple of indices
x2

array([[0, 6, 1, 4],
       [3, 4, 0, 9],
       [6, 2, 2, 8]])

In [70]:
x2[0, 0]

0

In [71]:
x2[0][0]

0

In [72]:
x2[1, -1]

9

In [73]:
x2[1, 0] = 99

In [74]:
x2

array([[ 0,  6,  1,  4],
       [99,  4,  0,  9],
       [ 6,  2,  2,  8]])

#### Array Slicing

x[start:stop:step]

In [75]:
x1

array([1, 1, 0, 1, 1, 2])

In [77]:
x1[0:4]

array([1, 1, 0, 1])

In [78]:
x1[0:4:2]

array([1, 0])

In [82]:
# start and stop default to indices 0 and length - 1
x1[:4]

array([1, 1, 0, 1])

In [80]:
x1[::2]

array([1, 0, 1])

In [84]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [86]:
# first 5 elements
x[:5]

array([0, 1, 2, 3, 4])

In [89]:
# last 5 elements
x[5:]

array([5, 6, 7, 8, 9])

In [92]:
# last 9 elements with step of 3
x[1::3]

array([1, 4, 7])

In [93]:
# every other element
x[::2]

array([0, 2, 4, 6, 8])

In [94]:
# all elements in reverse
# we use a reverse step!
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

#### Multi-Dimensionnal Arrays

In [95]:
x2

array([[ 0,  6,  1,  4],
       [99,  4,  0,  9],
       [ 6,  2,  2,  8]])

In [96]:
# first two rows, first three columns
x2[:2, :3]

array([[ 0,  6,  1],
       [99,  4,  0]])

In [98]:
# all rows, but only every other column
x2[:3, ::2] # or just x2[:, ::2]

array([[ 0,  1],
       [99,  0],
       [ 6,  2]])

In [101]:
# we can reverse subarays using reverse steps!
# first we reverse the rows
x2[::-1, :]

array([[ 6,  2,  2,  8],
       [99,  4,  0,  9],
       [ 0,  6,  1,  4]])

In [102]:
# then we can reverse the columns
x2[:, ::-1]

array([[ 4,  1,  6,  0],
       [ 9,  0,  4, 99],
       [ 8,  2,  2,  6]])

In [105]:
# we can even reverse everything!
x2[::-1, ::-1]

array([[ 8,  2,  2,  6],
       [ 9,  0,  4, 99],
       [ 4,  1,  6,  0]])

In [106]:
# read first column of x2
x2[:, 0]

array([ 0, 99,  6])

In [109]:
# read last row of x2
x2[-1,:]

array([6, 2, 2, 8])

#### Slices Return Views (Not Copies of Array Data)

This is unlike Python's built-in lists. Slices of a list return a copy.

This is actually desirable. We can slice a small piece of data and operate on it, knowing that underlying data is mutated.

In [110]:
x2

array([[ 0,  6,  1,  4],
       [99,  4,  0,  9],
       [ 6,  2,  2,  8]])

In [111]:
x2_sub = x2[:2, :2]
x2_sub

array([[ 0,  6],
       [99,  4]])

In [112]:
# modify the sub array
# and see how the original array is affected
x2_sub[0, 0] = -1
x2_sub[1, 0] = -1

In [113]:
x2

array([[-1,  6,  1,  4],
       [-1,  4,  0,  9],
       [ 6,  2,  2,  8]])

#### Creating Copies of Arrays
 
We must explicitly create copies because slicing does not.

This is easily achieved wich the NumPy array's copy method.

In [115]:
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[-1,  6],
       [-1,  4]])

In [116]:
x2_sub_copy[0, 0] = 10
x2_sub_copy[1, 0] = 100

In [117]:
x2

array([[-1,  6,  1,  4],
       [-1,  4,  0,  9],
       [ 6,  2,  2,  8]])

#### Reshaping Arrays

In order to work, the size of the initial array must match the size of the reshaped array!

Reshape uses a no-copy view of the initial array when possible. This isn't always possible with non-contiguous memory buffers though.

In [118]:
one_d = np.arange(1, 10)
one_d

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [120]:
grid = one_d.reshape((3, 3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [126]:
x = np.array([1, 2, 3])
x

array([1, 2, 3])

In [127]:
# we can create a row vector from the original array using reshape
x.reshape((1, 3))
x

array([1, 2, 3])

In [124]:
# we can create a column vector using reshape
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [128]:
# column vector using newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [130]:
x[:, np.newaxis, np.newaxis]

array([[[1]],

       [[2]],

       [[3]]])

#### Array Concatenation and Splitting

In [132]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [133]:
z = [99, 99, 99]
np.concatenate([x, y, z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [134]:
np.concatenate((x, y, z))

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [136]:
grid = np.array([[1, 2, 3], [4, 5, 6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [137]:
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [138]:
# concat along the second axis
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [139]:
# remember, 1st axis is 0 index
np.concatenate([grid, grid], axis=0)

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [142]:
x = np.array([1, 2, 3])
grid = np.array([[9,8,7],[6,5,4]])
grid

array([[9, 8, 7],
       [6, 5, 4]])

In [145]:
# vertical stacking
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [146]:
np.vstack([grid, x])

array([[9, 8, 7],
       [6, 5, 4],
       [1, 2, 3]])

In [147]:
# horizontal stacking
y = np.array([[99],[99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [148]:
np.hstack([y, grid])

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

#### Splitting Arrays

The opposite of concatenation

In [150]:
# split takes a list of indices to split on
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [154]:
x1, x2, x3, x4 = np.split(x, np.arange(2, len(x), 2))
print(x1, x2, x3, x4)

[1 2] [ 3 99] [99  3] [2 1]


In [155]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [158]:
upper, lower = np.vsplit(grid, [2])

In [159]:
upper

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [160]:
lower

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [162]:
left, right = np.hsplit(grid, [2])

In [163]:
left

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [164]:
right

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])