In [1]:
import numpy as np


## Creation of numpy arrary

In [2]:
# 1 dimension
a1d = np.array([1,2,3])
print(a1d)
# 2 dimensions
a2d = np.array([[1,2,3],[4,5,6]])
print(a2d)
# 3 dimensions
a3d = np.array([[[1,2],[3,4],[5,6]]])
print(a3d)

[1 2 3]
[[1 2 3]
 [4 5 6]]
[[[1 2]
  [3 4]
  [5 6]]]


In [3]:
# some helper functions to create common types of arrays:
print(np.zeros((3,4)))
#To specify that elements are ints instead of floats, use the parameter dtype:
np.zeros((3,4), dtype=int)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [4]:
# Similarly ones initializes all elements to one, full initializes all elements to a specified value, and empty leaves the elements uninitialized:
np.ones((3,4), dtype=int)
np.full((3,4), fill_value=2)
np.empty((2,4),dtype=int)

array([[    0,     1,     0,    32],
       [  988,     0,   768, 32765]])

In [5]:
# The eye function creates the identity matrix, that is, a matrix with elements on the diagonal are set to one, and non-diagonal elements are set to zero:
np.eye(4, dtype=int)

array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1]])

In [6]:
#The arange function works like the range function, but produces an array instead of a list.
np.arange(0,10,3)

array([0, 3, 6, 9])

In [7]:
# With linspace one does not have to compute the length of the step, but instead one specifies the wanted number of elements. By default, the endpoint is included in the result, unlike with arange.
np.linspace(1,5,10)

array([1.        , 1.44444444, 1.88888889, 2.33333333, 2.77777778,
       3.22222222, 3.66666667, 4.11111111, 4.55555556, 5.        ])

## Arrays with random elements

In [8]:
from numpy import random as rd 
rd.random(3)

array([0.66827072, 0.70223089, 0.45519834])

In [9]:
## Elements are uniformly distributed from half-open interval [0.0,1.0)
rd.random((3,4)) 

array([[0.46012181, 0.12078523, 0.78017467, 0.3950009 ],
       [0.42315393, 0.53046753, 0.9708274 , 0.09665513],
       [0.75218745, 0.7916162 , 0.40374613, 0.75113595]])

In [10]:
# # Elements are normally distributed with mean 0 and standard deviation 1
rd.normal(0, 1, (3,4))

array([[-1.27655777,  1.23155361,  0.91117913,  0.33537136],
       [ 0.24570941, -0.78201422, -0.39667706, -0.97914449],
       [-0.41223487,  1.09669066, -0.56161799,  0.43702671]])

In [11]:
 # Elements are uniformly distributed integers from the half-open interval [-2,10)
 rd.randint(-2,10,(3,4))

array([[ 3,  6, -2,  9],
       [ 7,  3,  8,  4],
       [ 6, -2,  5,  6]])

In [12]:
# Sometimes it is useful to be able to recreate exactly the same data in every run of our program. For example, if there is a bug in our program, which manifests itself only with certain input, then to debug our program it needs to behave deterministically. We can create random numbers deterministically, if we always start from the same starting point. This starting point is usually an integer, and we call it a seed
rd.seed(10)
print(rd.randint(0, 100, 10))
print(rd.normal(0, 1, 10))

[ 9 15 64 28 89 93 29  8 73  0]
[-0.68212385 -0.46121833  0.96566614  0.56066394  0.74993163 -0.31455963
  0.36624858  0.60808856 -1.08684028  0.12055902]


In [13]:
new_generator = rd.RandomState(seed=123)  # RandomState is a class, so we give the seed to its constructor
new_generator.randint(0, 100, 10)

array([66, 92, 98, 17, 83, 57, 86, 97, 96, 47])

## Array types and attributes
An array has several attributes: 
* ndim tells the number of dimensions, 
* shape tells the size in each dimension, 
* size tells the number of elements, 
* dtype tells the element type. 

In [14]:
# Let’s create a helper function to explore these attributes:
def info(name, a):
    print(f"{name} has dim {a.ndim}, shape {a.shape}, size {a.size}, and dtype {a.dtype}:")
    print(a)

In [15]:
b=np.array([[1,2,3], [4,5,6]])
info("b", b)

b has dim 2, shape (2, 3), size 6, and dtype int32:
[[1 2 3]
 [4 5 6]]


In [16]:
c=np.array([b, b])          # Creates a 3-dimensional array
info("c", c)

c has dim 3, shape (2, 2, 3), size 12, and dtype int32:
[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]


Note above how Python printed the three dimensional array. The general rules of printing an n-dimensional array as a nested list are:

* the last dimension is printed from left to right,
* the second-to-last is printed from top to bottom,
* the rest are also printed from top to bottom, with each slice separated from the next by an empty line.

In [17]:
d=np.array([[1,2,3,4]])                # a row vector
info("d", d)

d has dim 2, shape (1, 4), size 4, and dtype int32:
[[1 2 3 4]]


## Indexing, slicing and reshaping

### Indexing

In [18]:
# One dimensional array behaves like the list in Python:
a=np.array([1,4,2,7,9,5])
print(a[1])
print(a[-2])

4
9


In [19]:
# For multi-dimensional array the index is a comma separated tuple instead of a single integer:
b=np.array([[1,2,3], [4,5,6]])
print(b)
print(b[1,2])    # row index 1, column index 2
print(b[0,-1])   # row index 0, column index -1

[[1 2 3]
 [4 5 6]]
6
3


In [20]:
# As with lists, modification through indexing is possible
b[0,0] = 10
print(b)

[[10  2  3]
 [ 4  5  6]]


In [21]:
#Note that if you give only a single index to a multi-dimensional array, it indexes the first dimension of the array, that is the rows. For example:

print(b[0])    # First row
print(b[1])    # Second row

[10  2  3]
[4 5 6]


### Slicing
Slicing works similarly to lists, but now we can have slices in different dimensions:

In [22]:
print(a)
print(a[1:3])
print(a[::-1])    # Reverses the array

[1 4 2 7 9 5]
[4 2]
[5 9 7 2 4 1]


In [23]:
print(b)
print(b[:,0])
print(b[0,:])
print(b[:,1:])

[[10  2  3]
 [ 4  5  6]]
[10  4]
[10  2  3]
[[2 3]
 [5 6]]


In [24]:
#We can even assign to a slice:
b[:,1:] = 7
print(b)

[[10  7  7]
 [ 4  7  7]]


In [25]:
#A common idiom is to extract rows or columns from an array:
print(b[:,0])    # First column
print(b[1,:])    # Second row

[10  4]
[4 7 7]


## Reshaping
When an array is reshaped, its number of elements stays the same, but they are reinterpreted to have a different shape. An example of this is to interpret a one dimensional array as two dimension array:

In [26]:
a=np.arange(9)
anew=a.reshape(3,3)
info("anew", anew)
info("a", a)

anew has dim 2, shape (3, 3), size 9, and dtype int32:
[[0 1 2]
 [3 4 5]
 [6 7 8]]
a has dim 1, shape (9,), size 9, and dtype int32:
[0 1 2 3 4 5 6 7 8]


In [27]:
d=np.arange(4)             # 1d array
dr=d.reshape(1,4)          # row vector
dc=d.reshape(4,1)          # column vector
info("d", d)
info("dr", dr)
info("dc", dc)

d has dim 1, shape (4,), size 4, and dtype int32:
[0 1 2 3]
dr has dim 2, shape (1, 4), size 4, and dtype int32:
[[0 1 2 3]]
dc has dim 2, shape (4, 1), size 4, and dtype int32:
[[0]
 [1]
 [2]
 [3]]


Note the 1d array and the row and column vectors, which are 2d arrays, are fundamentally different objects, even though they look similar. They behave differently when we combine or otherwise operate arrays of different shapes, as we shall see in the next section and later in this material.

In [28]:
# An alternative syntax to create, for example, column or row vectors is through the np.newaxis keyword. 
# Sometimes this is easier or more natural than with the reshape method:
info("d", d)
info("drow", d[np.newaxis, :])
info("dcol", d[:, np.newaxis])

d has dim 1, shape (4,), size 4, and dtype int32:
[0 1 2 3]
drow has dim 2, shape (1, 4), size 4, and dtype int32:
[[0 1 2 3]]
dcol has dim 2, shape (4, 1), size 4, and dtype int32:
[[0]
 [1]
 [2]
 [3]]


## Array concatenation, splitting and stacking
The are two ways of combining several arrays into one bigger array: concatenate and stack. Concatenate takes n-dimensional arrays and returns an n-dimensional array, whereas stack takes n-dimensional arrays and returns n+1-dimensional array. Few examples of these:

In [29]:
a=np.arange(2)
b=np.arange(2,5)
print(f"a has shape {a.shape}: {a}")
print(f"b has shape {b.shape}: {b}")
np.concatenate((a,b))  # concatenating 1d arrays

a has shape (2,): [0 1]
b has shape (3,): [2 3 4]


array([0, 1, 2, 3, 4])

In [30]:
c=np.arange(1,5).reshape(2,2)
print(f"c has shape {c.shape}:", c, sep="\n")
np.concatenate((c,c))   # concatenating 2d arrays

c has shape (2, 2):
[[1 2]
 [3 4]]


array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

In [31]:
#By default concatenate joins the arrays along axis 0. To join the arrays horizontally, add parameter axis=1:
np.concatenate((c,c), axis=1)

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [32]:
# If you want to catenate arrays with different dimensions, for example to add a new column to a 2d array, you must first reshape the arrays to have same number of dimensions:
print("New row:")
print(np.concatenate((c,a.reshape(1,2))))
print("New column:")
print(np.concatenate((c,a.reshape(2,1)), axis=1))

New row:
[[1 2]
 [3 4]
 [0 1]]
New column:
[[1 2 0]
 [3 4 1]]


In [33]:
#Use stack to create higher dimensional arrays from lower dimensional arrays:
np.stack((b,b))

array([[2, 3, 4],
       [2, 3, 4]])

In [34]:
np.stack((b,b), axis=1)

array([[2, 2],
       [3, 3],
       [4, 4]])

In [35]:
# Inverse operation of concatenate is split. 
# Its argument specifies either the number of equal parts the array is divided into, or it specifies explicitly the break points.

d=np.arange(12).reshape(6,2)
print("d:")
print(d)
d1,d2 = np.split(d, 2)
print("d1:")
print(d1)
print("d2:")
print(d2)
d3,d4 = np.split(d, 2, axis=1)
print("d3:")
print(d3)
print("d4:")
print(d4)

d:
[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]]
d1:
[[0 1]
 [2 3]
 [4 5]]
d2:
[[ 6  7]
 [ 8  9]
 [10 11]]
d3:
[[ 0]
 [ 2]
 [ 4]
 [ 6]
 [ 8]
 [10]]
d4:
[[ 1]
 [ 3]
 [ 5]
 [ 7]
 [ 9]
 [11]]


In [36]:
d=np.arange(12).reshape(2,6)
print("d:")
print(d)
parts=np.split(d, (2,3,5), axis=1)
for i, p in enumerate(parts):
    print("part %i:" % i)
    print(p)

d:
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
part 0:
[[0 1]
 [6 7]]
part 1:
[[2]
 [8]]
part 2:
[[ 3  4]
 [ 9 10]]
part 3:
[[ 5]
 [11]]


In [37]:
    inital_array = np.eye(3, dtype=int)
    half_diamnond = np.concatenate((inital_array[::-1],inital_array[:,1:]), axis=1)
    full_diamond = np.concatenate((half_diamnond[:-1],half_diamnond[::-1]), axis=0)
    print(half_diamnond)
    print()
    print(full_diamond)

[[0 0 1 0 0]
 [0 1 0 1 0]
 [1 0 0 0 1]]

[[0 0 1 0 0]
 [0 1 0 1 0]
 [1 0 0 0 1]
 [0 1 0 1 0]
 [0 0 1 0 0]]


In [38]:
a = np.eye(3, dtype=int)
a = a[::-1]
a

array([[0, 0, 1],
       [0, 1, 0],
       [1, 0, 0]])

## Fancy indexing
Fancy indexing provides a concise syntax for accessing multiple elements:

In [39]:
# Using indexing we can get a single elements from an array. If we wanted multiple (not necessarily contiguous) elements, we would have to index several times:
np.random.seed(0)
a=np.random.randint(0, 20,20)
a2=np.array([a[2], a[5], a[7]])
print(a)
print(a2)

[12 15  0  3  3  7  9 19 18  4  6 12  1  6  7 14 17  5 13  8]
[ 0  7 19]


In [40]:
# fancy indexing
idx=[2,5,7]           # List of indices
print(a[idx])         # In fancy indexing in place of a single index, we can provide a list of indices
print(a[[2,5,7]])     # Or directly

[ 0  7 19]
[ 0  7 19]


In [41]:
#We can also assign to multiple elements through fancy indexing:
a[idx] = -1
print(a)

[12 15 -1  3  3 -1  9 -1 18  4  6 12  1  6  7 14 17  5 13  8]


In [42]:
#Fancy indexing works also for higher dimensional arrays:
b=np.arange(16).reshape(4,4)
print(b)
row=np.array([0,2])
col=np.array([1,3])
print(b[row, col])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[ 1 11]


In [43]:
#Note that the result array was one dimensional! The shape of the result array is defined by the shape of the index arrays, not by the shape of the original array. The next example will demonstrate this.

row2=np.array([[0, 0], [2,2]])
col2=np.array([[1,3], [1,3]])
print(row2)
print(col2)
print(b[row2, col2])

[[0 0]
 [2 2]]
[[1 3]
 [1 3]]
[[ 1  3]
 [ 9 11]]


## Sorting arrays

In [44]:
a=np.array([2,1,4,3,5])
print(np.sort(a))          # Does not modify the argument
print(a)

[1 2 3 4 5]
[2 1 4 3 5]


In [45]:
a.sort()            # Modifies the argument
print(a)

[1 2 3 4 5]


In [46]:
b=np.random.randint(0,10, (4,4))
print(b)

[[9 4 3 0]
 [3 5 0 2]
 [3 8 1 3]
 [3 3 7 0]]


In [47]:
np.sort(b, axis=0)           # sort each column

array([[3, 3, 0, 0],
       [3, 4, 1, 0],
       [3, 5, 3, 2],
       [9, 8, 7, 3]])

In [48]:
np.sort(b, axis=1) 

array([[0, 3, 4, 9],
       [0, 2, 3, 5],
       [1, 3, 3, 8],
       [0, 3, 3, 7]])

In [49]:
# A related operation is the argsort function. Which doesn’t sort the elements, but returns the indices of the sorted elements. An example will demonstrate this:
a=np.array([23,12,47,35,59])
print("Array a:", a)
idx = np.argsort(a)
print("Indices:", idx)

Array a: [23 12 47 35 59]
Indices: [1 0 3 2 4]


In [50]:
print(a[idx])

[12 23 35 47 59]


In [54]:
b

array([[9, 4, 3, 0],
       [3, 5, 0, 2],
       [3, 8, 1, 3],
       [3, 3, 7, 0]])

In [53]:
b[:,2]

array([3, 0, 1, 7])