# NumPy

NumPy is a Python C extension library for array-oriented computing:
* Efficient
* In-memory
* Contiguous
* Homogeneous

In [1]:
import numpy as np

## Multidimensional array

![ndarray.png](img/ndarray.png)
Source: https://www.oreilly.com/library/view/elegant-scipy/9781491922927/ch01.html

### Array creation

In [2]:
# Creating numpy arrays from Python lists:

vector = np.array([1, 2, 3, 4, 5, 6])
matrix = np.array([
    [6, 7, 8, 9, 10, 11],
    [11, 12, 13, 14, 15, 16],
])
print(type(vector))
print(type(matrix))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [3]:
# Converting numpy arrays back to Python lists:
print(vector.tolist())
print(matrix.tolist())

[1, 2, 3, 4, 5, 6]
[[6, 7, 8, 9, 10, 11], [11, 12, 13, 14, 15, 16]]


In [4]:
# Using built-in numpy functions to
# initialize array with some constant:

zero_matrix = np.zeros((3, 5))
ones_matrix = np.ones((3, 5))
number_matrix = np.full((3, 5), 3.14)

print(zero_matrix)
print(ones_matrix)
print(number_matrix)


[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]]


In [5]:
# Generating sequences

# Similar to built-in Python range() function:
sequence_ints = np.arange(0, 20, 2)

# Evenly spaced numbers over some interval:
sequence_floats = np.linspace(0, 1, 5)

print(sequence_ints)
print(sequence_floats)

[ 0  2  4  6  8 10 12 14 16 18]
[0.   0.25 0.5  0.75 1.  ]


In [6]:
# Random values

# Floating-point values between 0 and 1:
random_floats = np.random.random((3, 5))

# Integer values between 0 and 100:
random_integers = np.random.randint(0, 100, (3, 5))

print(random_floats)
print(random_integers)

[[0.2343816  0.38047985 0.40893578 0.25066185 0.98279342]
 [0.62776566 0.90690507 0.04295425 0.98488676 0.81865499]
 [0.37550303 0.86091903 0.56294572 0.03390517 0.42132878]]
[[71 62 35 60 47]
 [ 8 56 62 32 28]
 [64 95 26 50  6]]


In [7]:
# Other array creation routines

# Identity matrices:
identity_matrix = np.eye(5)

# Allocate array without initializing its values:
uninitialized_matrix = np.empty((3, 5))

print(identity_matrix)
print(uninitialized_matrix)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[0.2343816  0.38047985 0.40893578 0.25066185 0.98279342]
 [0.62776566 0.90690507 0.04295425 0.98488676 0.81865499]
 [0.37550303 0.86091903 0.56294572 0.03390517 0.42132878]]


### Shapes

In [8]:
vector = np.array([1, 2, 3, 4, 5, 6])
matrix = np.array([
    [6, 7, 8, 9, 10, 11],
    [11, 12, 13, 14, 15, 16],
])

# Getting array shape:
print(vector.shape)
print(matrix.shape)

# Number of dimensions:
print(vector.ndim)
print(matrix.ndim)

(6,)
(2, 6)
1
2


In [9]:
# Arrays can be reshaped:
print(matrix)
print(matrix.reshape(3, 4))

[[ 6  7  8  9 10 11]
 [11 12 13 14 15 16]]
[[ 6  7  8  9]
 [10 11 11 12]
 [13 14 15 16]]


In [10]:
# Also possible to reshape into a different number of dimensions:

# 2D to 1D
print(matrix.reshape(12))

# 1D to 2D
print(vector.reshape((2, 3)))

[ 6  7  8  9 10 11 11 12 13 14 15 16]
[[1 2 3]
 [4 5 6]]


In [11]:
# Converting multi-dimensional array into 1D:

# This returns a new 1D array:
print(matrix.flatten())

# This returns a view into the original array:
print(matrix.ravel())

[ 6  7  8  9 10 11 11 12 13 14 15 16]
[ 6  7  8  9 10 11 11 12 13 14 15 16]


### Data types

![np_datatypes.png](img/np_datatypes.png)
Source: https://medium.com/@vsvaibhav2016/basics-of-numpy-python-for-data-analysis-45b0c43f591b

In [12]:
vector = np.array([1, 2, 3, 4, 5, 6])
matrix = np.array([
    [6, 7, 8, 9, 10, 11],
    [11, 12, 13, 14, 15, 16],
])

# Inspecting array data type:
print(matrix.dtype)

int64


In [13]:
# Data type is implicitly determined
# during array creation:

vector_int = np.array([1, 2, 3])
vector_float = np.array([1.0, 2.0, 3.0])

print(vector_int.dtype)
print(vector_float.dtype)

int64
float64


In [14]:
# It can also be specified explicitly:
vector_small_int = np.array([1, 2, 3], dtype=np.uint8)
vector_small_float = np.array([1.0, 2.0, 3.0], dtype=np.float32)

print(vector_small_int.dtype)
print(vector_small_float.dtype)

uint8
float32


In [15]:
# Data type conversions
# Be careful with casting into a narrower data type!

print(vector_int.astype(np.float32))
print(vector_float.astype(np.int32))

[1. 2. 3.]
[1 2 3]


### NumPy arrays vs. Python lists

In [16]:
# Once the numpy array is created, you cannot
# increase/decrease its size - a new array
# has to be created instead
some_vector = np.array([1, 2, 3, 4, 5])

try:
    some_vector.append(6)
except:
    print('Regular append() wont work...')

# This returns a new array:
new_vector = np.append(some_vector, 7)
print(new_vector)

Regular append() wont work...
[1 2 3 4 5 7]


In [17]:
# Some operations not possible with Python lists
# can be done with numpy arrays:

some_vector = np.array([1, 2, 3, 4, 5])

# Wouldn't work with regular lists:
some_vector += 2

print(some_vector)

[3 4 5 6 7]


In [18]:
# Adding two arrays together does not concatenate them,
# but adds their elements together.
# Same goes for other arithmetic operations.

vec1 = np.array([1, 2, 3])
vec2 = np.array([4, 5, 6])
vec1 + vec2

array([5, 7, 9])

## Indexing

In [19]:
# Accessing elements of numpy arrays:
vector = np.array([1, 2, 3, 4, 5, 6])
matrix = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
])

print(vector[3])

# Both options work:
print(matrix[2][2])
print(matrix[2, 2])

4
11
11


### Slicing

In [20]:
# Regular list slicing works in any number of dimensions:
print(vector[0::2])    # Every second element
print(matrix[:, 0::2]) # Every second column and all rows

[1 3 5]
[[ 1  3]
 [ 5  7]
 [ 9 11]]


### Integer indexing

In [21]:
# You can use integer lists (or numpy arrays)
# to access certain elements of numpy array

# Either a regular Python list or a numpy array:
indices_py = [1, 3, 5]
indices_np = np.array([1, 3, 5])

print(vector)
print(vector[indices_py])
print(vector[indices_np])

[1 2 3 4 5 6]
[2 4 6]
[2 4 6]


In [22]:
# Integer indexing: 2D example

# The tuple below must contain the same number of lists
# as there are dimensions in the numpy array.

# Each list represents indices for each array axis:
indices = (
    [0, 2],   # First axis (rows)
    [0, 3],   # Second axis (columns)
)

# Integer indices can be used both for access and assignment:
print(matrix[indices])
matrix[indices] = 9999
print(matrix)

[ 1 12]
[[9999    2    3    4]
 [   5    6    7    8]
 [   9   10   11 9999]]


### Boolean indexing

In [23]:
# Numpy array elements can also be accessed
# by using a boolean array of the same dimension:

# Once again, both Python lists and numpy arrays work fine:
mask_py = [False, True, False, True, False, True]
mask_np = np.array(mask_py)

print(vector[mask_py])
print(vector[mask_np])

[2 4 6]
[2 4 6]


In [24]:
# Boolean indexing is most often used
# together with comparison operators:

matrix = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
])

# Using numpy array with some comparison operator
# will return boolean array of the same dimension.
mask = matrix > 5
print(mask)
print(matrix[mask])

[[False False False False]
 [False  True  True  True]
 [ True  True  True  True]]
[ 6  7  8  9 10 11 12]


In [25]:
# In some cases you might want to use comparison operator,
# but get integer indices, instead of boolean mask.

# np.where() can be used in such case:
indices = np.where(matrix > 5)

print(indices)
print(matrix[indices])

(array([1, 1, 1, 2, 2, 2, 2]), array([1, 2, 3, 0, 1, 2, 3]))
[ 6  7  8  9 10 11 12]


## Array manipulation

### Sorting

In [26]:
# Simplest way of sorting arrays:

unsorted_vector = np.array([100, 400, 200, 500, 300, 600])

# This happens in-place:
print(unsorted_vector.sort())
print(unsorted_vector)

None
[100 200 300 400 500 600]


In [27]:
# Returning the element indices in particular order,
# without actually sorting the array:

unsorted_vector = np.array([100, 400, 200, 500, 300, 600])
order = np.argsort(unsorted_vector)

print(order)
print(unsorted_vector[order])

# Useful when you have more than one array
# and want to sort them all in specific order

[0 2 4 1 3 5]
[100 200 300 400 500 600]


### Joining

In [28]:
# Concatenating arrays:

a1 = np.random.randint(0, 100, (2, 10))
a2 = np.random.randint(0, 100, (3, 10))

# This joins multiple arrays along existing axis:
concatenated = np.concatenate((a1, a2))
concatenated.shape

(5, 10)

In [29]:
# Stacking arrays:

a1 = np.random.randint(0, 100, (10,))
a2 = np.random.randint(0, 100, (10,))

# This joins multiple arrays along a new axis:
stacked = np.stack((a1, a2))
stacked.shape


(2, 10)

## Operations

In [30]:
# Numpy module itself contains a large number
# of various mathematical functions

np.sin(np.pi / 2)

1.0

### Basic statistics

In [31]:
# Dealing with 1D arrays is simple:
print('Min:            ', vector.min())
print('Max:            ', vector.max())
print('Sum:            ', vector.sum())
print('Std. deviation: ', vector.std())
print('Variance:       ', vector.var())
print('Mean:           ', vector.mean())
print('Median:         ', np.median(vector))

Min:             1
Max:             6
Sum:             21
Std. deviation:  1.707825127659933
Variance:        2.9166666666666665
Mean:            3.5
Median:          3.5


In [32]:
# When working with two (or more) dimensional arrays,
# axis can be specified along which the operation is applied:

# This just returns the sum of all elements:
print(matrix.sum())

# This computes sum of each column:
print(matrix.sum(axis=0))

# This computes sum of each row:
print(matrix.sum(axis=1))

78
[15 18 21 24]
[10 26 42]


### Linear algebra

In [33]:
# Matrix transpose:

print(matrix)
print(matrix.T)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]]


In [34]:
# Matrix determinant:

square_matrix = np.random.randint(0, 100, (5, 5))
np.linalg.det(square_matrix)

-1807788.999999882

In [35]:
# Matrix inverse
np.linalg.inv(square_matrix)

array([[  7.69957501, -10.08189396,   3.63536176,  -2.89099392,
          3.58035866],
       [  5.26902808,  -6.91578829,   2.49661548,  -1.96950806,
          2.45280174],
       [ -5.27930749,   6.91899608,  -2.50178588,   1.99002096,
         -2.44915087],
       [  7.179844  ,  -9.41069339,   3.4045096 ,  -2.70904348,
          3.34633964],
       [-15.85649487,  20.79630919,  -7.49355096,   5.95767095,
         -7.39633718]])

In [36]:
# Matrix multiplication

# Note the dimensions!
m1 = np.random.randint(0, 100, (2, 5))
m2 = np.random.randint(0, 100, (5, 3))

np.dot(m1, m2)

array([[15425, 23552, 19653],
       [ 7423,  6027,  6935]])

In [37]:
# Dot product vs. element-size multiplication
# Don't confuse them!
sm1 = np.random.randint(0, 100, (4, 4))
sm2 = np.random.randint(0, 100, (4, 4))

print(np.dot(sm1, sm2))
print(sm1 * sm2)

[[ 6489  9252 10081  3410]
 [ 7121 15608 16044  9395]
 [ 5819 10008 10201  5355]
 [ 9212 14558 15128  7140]]
[[ 374  276 3484 5133]
 [ 581 1332 2044  765]
 [ 294 3195 2508  658]
 [5238 3300 4158  936]]
