In [1]:
import numpy as np

Numpy stands for Numerical Python. It's the workhorse behind any numerical calculations allowing superior performance compared to Python built-in functions. Pandas amongst others libraries rely on Numpy

Let's first check which verion we have

In [2]:
np.__version__

'1.26.4'

## NumPy arrays

In [3]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [4]:
np.array([1,2,3,4],dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [5]:
np.zeros(10,dtype='int')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [6]:
np.ones((3,3),dtype='float')

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [7]:
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

In [9]:
np.arange(10,0,-1)

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

Another useful function is `linspace` for creating equally spaced values

In [10]:
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

It's often the case that we want to create an array with random values

In [11]:
np.random.random((2,2))

array([[0.79992743, 0.53221498],
       [0.16944939, 0.96259018]])

In [12]:
np.random.normal(0,1,(2,2))

array([[-0.08268171,  0.53982825],
       [-1.79964642, -0.33926828]])

Below we see how we can create an identity matrix

In [13]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Basic operations with numpy arrays

Each array has a dimension, a shape and a size

In [14]:
x = np.random.normal(0,1,(3,3))
x

array([[ 0.08808941,  0.98234717,  0.19073967],
       [-2.41962397, -0.22305437, -0.66223197],
       [ 0.19883099,  0.10005855, -0.28208093]])

In [15]:
print("x dimension: ", x.ndim)
print("x shape: ", x.shape)
print("x size: ", x.size)

x dimension:  2
x shape:  (3, 3)
x size:  9


It's also useful to see the type of the elements

In [16]:
print("x type: ", x.dtype)

x type:  float64


Below we see how we can access array elements

In [21]:
x1 = np.arange(0,11)
x1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [18]:
x1[0]

0

In [22]:
x1[9]

9

In [23]:
x1[10]

10

In [24]:
x1[-1]

10

In [25]:
x1[:4]

array([0, 1, 2, 3])

In [26]:
x1[-5:]

array([ 6,  7,  8,  9, 10])

In [27]:
x1[0:6:2]

array([0, 2, 4])

In the case of multidimensional arrays

In [28]:
x

array([[ 0.08808941,  0.98234717,  0.19073967],
       [-2.41962397, -0.22305437, -0.66223197],
       [ 0.19883099,  0.10005855, -0.28208093]])

In [29]:
x[0,0]

0.0880894134756335

Below we select an entire row

In [30]:
x[0,:]

array([0.08808941, 0.98234717, 0.19073967])

In [31]:
x[0]

array([0.08808941, 0.98234717, 0.19073967])

Below we select an entire column

In [32]:
x[:,2]

array([ 0.19073967, -0.66223197, -0.28208093])

A subtle difference

In [33]:
print(x[1,:])
x[1,:].shape

[-2.41962397 -0.22305437 -0.66223197]


(3,)

In [35]:
print(x[1:2,:])
x[1:2,:].shape

[[-2.41962397 -0.22305437 -0.66223197]]


(1, 3)

Reversing rows and columns

In [36]:
x

array([[ 0.08808941,  0.98234717,  0.19073967],
       [-2.41962397, -0.22305437, -0.66223197],
       [ 0.19883099,  0.10005855, -0.28208093]])

In [37]:
x[::-1,]

array([[ 0.19883099,  0.10005855, -0.28208093],
       [-2.41962397, -0.22305437, -0.66223197],
       [ 0.08808941,  0.98234717,  0.19073967]])

In [38]:
x[:,::-1]

array([[ 0.19073967,  0.98234717,  0.08808941],
       [-0.66223197, -0.22305437, -2.41962397],
       [-0.28208093,  0.10005855,  0.19883099]])

In [39]:
x[::-1,::-1]

array([[-0.28208093,  0.10005855,  0.19883099],
       [-0.66223197, -0.22305437, -2.41962397],
       [ 0.19073967,  0.98234717,  0.08808941]])

### Copy views

In [40]:
x2 = x1
x2

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [41]:
x1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [42]:
x2[0] = 3
x2

array([ 3,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [43]:
x1

array([ 3,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [44]:
x1 = np.arange(0,11)
x2 = x1.copy()
x2

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [45]:
x2[0] = 3
x2

array([ 3,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [46]:
x1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

### Reshaping numpy arrays

In [47]:
x1 = np.arange(0,9)
x1

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [48]:
x1.reshape((3,3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

create a column vector

In [50]:
x1[:,np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

or equivalent

In [51]:
x1.reshape((9,1))

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

### Joining arrays

In [52]:
print(x1)
print(x2)

[0 1 2 3 4 5 6 7 8]
[ 3  1  2  3  4  5  6  7  8  9 10]


In [53]:
np.concatenate([x1,x2])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  3,  1,  2,  3,  4,  5,  6,  7,
        8,  9, 10])

In [54]:
x = np.random.normal(0,1,(2,2))
x

array([[1.63236066, 1.1328367 ],
       [0.12182427, 1.56122284]])

In [55]:
y = np.random.normal(0,1,(2,2))
y

array([[-0.47629142, -0.72075656],
       [ 0.17077487, -1.71609713]])

In [56]:
np.vstack([x,y])

array([[ 1.63236066,  1.1328367 ],
       [ 0.12182427,  1.56122284],
       [-0.47629142, -0.72075656],
       [ 0.17077487, -1.71609713]])

In [57]:
np.hstack([x,y])

array([[ 1.63236066,  1.1328367 , -0.47629142, -0.72075656],
       [ 0.12182427,  1.56122284,  0.17077487, -1.71609713]])

### Elementwise operations

In [58]:
x = np.arange(1,100)
x

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [59]:
def reciprocal(x):
    output = np.empty(len(x))
    for i in range(len(x)):
        output[i] = 1/x[i]
    return output

In [60]:
%timeit reciprocal(x)

10.3 µs ± 199 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [61]:
%timeit 1/x

790 ns ± 2.83 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


Standard numeric operations can be performed

In [62]:
x + 5

array([  6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,
        19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
        32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
        45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,
        58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,
        71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
        84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
        97,  98,  99, 100, 101, 102, 103, 104])

In [63]:
x**2

array([   1,    4,    9,   16,   25,   36,   49,   64,   81,  100,  121,
        144,  169,  196,  225,  256,  289,  324,  361,  400,  441,  484,
        529,  576,  625,  676,  729,  784,  841,  900,  961, 1024, 1089,
       1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936,
       2025, 2116, 2209, 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025,
       3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 4096, 4225, 4356,
       4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929,
       6084, 6241, 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744,
       7921, 8100, 8281, 8464, 8649, 8836, 9025, 9216, 9409, 9604, 9801])

For large calculations it can be useful to specify beforehand the array where the results will be stored. So instead of creating a temporary array we can write the computations directly where we need them.

In [65]:
x = np.random.normal(0,1,1000)
y = np.empty(1000)

In [66]:
np.multiply(x,2,out=y)
y

array([-1.13796463e+00, -1.33122503e+00,  2.23546603e+00,  1.11027547e+00,
       -3.14685536e+00,  8.18740922e-01, -2.91358823e+00, -2.04184669e-01,
       -1.06466090e+00,  9.69925297e-01, -4.39457345e+00, -2.84019471e-01,
       -7.63276028e-01, -1.43226547e+00, -4.24364374e+00, -1.16619725e+00,
        4.79890147e-01,  1.23771957e+00, -6.26828519e-01, -2.72499631e+00,
       -2.23342032e-01,  5.29738037e-01, -2.22408431e+00,  1.81830150e+00,
       -4.07492883e+00, -1.84472359e+00, -1.67878380e-01,  7.32551735e-01,
        1.39908101e+00,  2.05925510e+00,  6.89450803e-01, -1.42359445e+00,
        3.07837405e+00,  2.06252356e+00,  5.03887017e+00,  1.64556151e+00,
       -1.53861078e+00, -9.65803671e-02,  1.35437282e+00,  3.38511792e-01,
       -1.19098013e+00,  8.29659546e-01,  2.35321380e+00, -2.88273913e+00,
        1.03792666e+00,  2.00632959e+00, -4.45058330e-01,  2.15716004e-02,
       -2.83926889e-01,  2.10412203e+00,  2.40267882e+00, -3.43466726e+00,
       -1.20452084e+00, -

We can easily perform all sorts of aggregations

In [67]:
x = np.arange(1,6)
x

array([1, 2, 3, 4, 5])

In [68]:
np.add.reduce(x)

15

In [69]:
np.multiply.reduce(x)

120

In [70]:
np.min(y), np.max(y)

(-6.464528460199527, 8.453735414607127)

In the case of multidimensional arrays we can perform aggregations per axis

In [71]:
x = np.random.randint(1,20,size=9).reshape(3,3)
x

array([[15,  1, 18],
       [15,  7, 14],
       [12,  6, 12]])

In [72]:
x.sum()

100

We can obtain the sum per column or per row

In [73]:
x.sum(axis=0)

array([42, 14, 44])

In [74]:
x.sum(axis=1)

array([34, 36, 30])

Below we see what happens when we have missing values

In [75]:
z = np.random.random((2,2))
z

array([[0.13214626, 0.87732991],
       [0.76589653, 0.3179049 ]])

In [76]:
z[0,0] = np.nan
z

array([[       nan, 0.87732991],
       [0.76589653, 0.3179049 ]])

In [77]:
z.sum()

nan

In [78]:
z.sum(axis=0)

array([       nan, 1.19523481])

In [79]:
z.sum(axis=1)

array([       nan, 1.08380143])

NumPy offers special functions to deal with missing values

In [80]:
np.nansum(z)

1.9611313416191793

In [81]:
np.nansum(z,axis=0)

array([0.76589653, 1.19523481])

In [82]:
np.nansum(z,axis=1)

array([0.87732991, 1.08380143])

## Broadcasting

A set of rules for applying binary universal functions on arrays of different sizes

* If the arrays do not have the same rank, then a 1 will be prepended to the smaller ranking array until their ranks match.
* Arrays with a 1 along a particular dimension act as if they had the size of the array with the largest shape along that dimension. The value of the array element is repeated along that dimension.
* After rules 1 & 2, the sizes of all arrays must match.

In [83]:
z = np.random.randint(1,20,size=9).reshape(3,3)
z

array([[ 9,  1, 16],
       [ 1, 19, 17],
       [ 5, 12,  2]])

In [84]:
a = np.arange(1,4).reshape(1,3)
a

array([[1, 2, 3]])

In [85]:
a.shape

(1, 3)

In [86]:
z.shape

(3, 3)

In [87]:
z + a

array([[10,  3, 19],
       [ 2, 21, 20],
       [ 6, 14,  5]])

In [88]:
b = np.arange(4,7).reshape(3,1)
b

array([[4],
       [5],
       [6]])

In [89]:
b.shape

(3, 1)

In [90]:
z.shape

(3, 3)

In [91]:
z + b

array([[13,  5, 20],
       [ 6, 24, 22],
       [11, 18,  8]])

In [92]:
a.shape

(1, 3)

In [93]:
b.shape

(3, 1)

In [94]:
a + b

array([[5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

In [95]:
c = np.arange(8,10)
c

array([8, 9])

In [96]:
c.shape

(2,)

In [97]:
z.shape

(3, 3)

In [98]:
z + c

ValueError: operands could not be broadcast together with shapes (3,3) (2,) 

### Conditionals

In [99]:
z

array([[ 9,  1, 16],
       [ 1, 19, 17],
       [ 5, 12,  2]])

In [100]:
z > 4

array([[ True, False,  True],
       [False,  True,  True],
       [ True,  True, False]])

In [101]:
z[z>4]

array([ 9, 16, 19, 17,  5, 12])

### Linear algebra

In [113]:
y = np.random.rand(4,3)
y

array([[0.82155158, 0.6854406 , 0.48392458],
       [0.6713123 , 0.99703274, 0.38350124],
       [0.14309541, 0.19077345, 0.94034602],
       [0.558754  , 0.26442689, 0.15550532]])

In [105]:
np.diag(y)

array([0.8003259 , 0.67327747, 0.77245745, 0.54681098])

In [106]:
np.trace(y)

2.792871808944828

In [107]:
np.diag(y).sum()

2.792871808944828

Transpose of a matrix

In [108]:
y.T

array([[0.8003259 , 0.4248953 , 0.91344162, 0.96688284],
       [0.74258721, 0.67327747, 0.28347037, 0.10273119],
       [0.22179714, 0.24117614, 0.77245745, 0.25255189],
       [0.33505874, 0.20617316, 0.53521592, 0.54681098]])

Identity matrix

In [109]:
I = np.eye(3,3)
I

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [110]:
x = np.random.rand(3,3)
x

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

In [111]:
x.dot(I)

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

Matrix dot product

In [114]:
x.dot(y)

ValueError: shapes (3,3) and (4,3) not aligned: 3 (dim 1) != 4 (dim 0)

In [115]:
y.dot(x)

array([[0.74837775, 1.12764368, 1.25282637],
       [0.7786017 , 1.2326622 , 1.13621885],
       [0.76782004, 0.81177816, 1.08338421],
       [0.30444995, 0.50536591, 0.62333901]])

Inverse and pseudoinverse

In [116]:
np.linalg.inv(x)

array([[-3.1494714 , -0.55414001,  2.41369461],
       [ 1.14678222,  2.05137767, -1.41188841],
       [ 1.50490462, -0.96827176,  0.2152543 ]])

In [117]:
x.dot(np.linalg.inv(x))

array([[ 1.00000000e+00, -1.02516881e-16,  2.19219367e-17],
       [-2.93326048e-17,  1.00000000e+00,  1.82733210e-17],
       [-1.19372174e-16, -2.52528033e-17,  1.00000000e+00]])

In [118]:
np.linalg.pinv(x)

array([[-3.1494714 , -0.55414001,  2.41369461],
       [ 1.14678222,  2.05137767, -1.41188841],
       [ 1.50490462, -0.96827176,  0.2152543 ]])

QR decomposition

In [119]:
q, r = np.linalg.qr(x)

In [120]:
q.dot(r)

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

In [121]:
x

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

## Saving and loading NumPy arrays

Binary format

In [122]:
np.save('my_array', x)

In [123]:
with open('my_array.npy', 'rb') as a:
    array = a.read()
array

b"\x93NUMPY\x01\x00v\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3), }                                                          \n\xf4\xad\xe4M&\x12\xc4?\x08\xd9\x87#E\x0c\xd8?M\x9d\x0c\x830\x9a\xe6?\xb4\xbfHA\x1f\xb7\xd9?\x87h\xde\x94M^\xe7?|\xe6\xa3@\xcd3\xd2?\x15B\xd9\xeb\xc1\xc1\xe6?a\xe4\x89\x11\xe2\r\xe5?\x8e\xe8\xa3\xb5\xeb\x94\xef?"

In [124]:
a_load = np.load('my_array.npy')
a_load

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

Text format

In [125]:
np.savetxt('my_array.csv',x, delimiter=',')

In [126]:
a_text = np.loadtxt('my_array.csv', delimiter=',')
a_text

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

We can also save multiple arrays in zipped format

In [127]:
np.savez('my_arrays', my_a=x, my_b=y)

In [128]:
my_arrays = np.load('my_arrays.npz')
my_arrays

NpzFile 'my_arrays.npz' with keys: my_a, my_b

In [129]:
my_arrays.files

['my_a', 'my_b']

In [130]:
my_arrays['my_a']

array([[0.15680388, 0.37574891, 0.70632196],
       [0.40180189, 0.7302616 , 0.28441173],
       [0.71115204, 0.65794471, 0.9869288 ]])

In [131]:
my_arrays['my_b']

array([[0.82155158, 0.6854406 , 0.48392458],
       [0.6713123 , 0.99703274, 0.38350124],
       [0.14309541, 0.19077345, 0.94034602],
       [0.558754  , 0.26442689, 0.15550532]])

## Exercises

### Exercise 1

Create a 5x5 numpy array of booleans where all values are True apart from the first element of the last row.

### Exercise 2

Create a numpy array with values ranging from 1 to 20. Remove the even numbers.

### Exercise 3

Create a 5x5 numpy array and populated with random numbers from a normal distribution (mean=0, std=1). Filter out all the negative values. Change the resulting array to multi-dimensional array where the number of rows will be equal to the number of positive elements and 1 column.

### Exercise 4

Create a 10x10 identity matrix of integer type. Save the matrix in csv format using a file name of your choice. Load back the matrix.

### Exercise 5

Create a 5x5 numpy array populated by random integers ranging from 1 to 100. Calculate the sum of rows, columns and of the diagonal elements.

### Exercise 6

Create a 4x3 numpy array and populated with random integers ranging from 1 to 20. Estimate it's transpose. Multiply the two arrays.

### Exercise 7

Create a 2x4 and a 4x4 numpy arrays. Populate them with random integers ranging from 80 to 100. Join them either vertically or horizontally.

### Exercise 8

Create a 4x4 numpy array and populate with numbers originating from standard normal distribution (mean=0, std=1). Change all negative numbers to missing. Calculate the sum of all the numbers.

### Exercise 9

As Exercise 8 but without affecting the original array.

### Exercise 10


Create a 4x4 numpy array and populate it with random integers ranging from 25 to 50. Calculate the inverse and the pseudo inverse of the array. Multiply the original array with it's inverse.

### Exercise 11

Create two numpy arrays and populate the with 30 random numbers ranging from 1 to 7. Find any elements that match.    

### Exercise 12

Create a 5x4 numpy array and populate it with random integers ranging from 1 to 10. Swap the first and third column and the second and fourth row.