In [20]:
import numpy as np

## Numpy Basics

`ndarray.ndim` - the number of axes (dimensions) of the array.

`ndarray.shape` - the dimensions of the array. This is a tuple of integers indicating the size of the array in each dimension, e.g., a matrix with $n$ rows and $m$ columns, `shape` will be $(n, m)$.

`ndarray.size` - the total number of elements of the array. (the product of the elements of `shape`)
`ndarray.dtype` - an object dscribing the type of the elements in the array.

`ndarray.itemsize` - the size in bytes of eah element of the array. It is equivalent to `ndarray.dtype.itemsize`. e.g., an array of elements of type `float64` has `itemsize` 8 (64/8).

`ndarray.data` - the buffer containing the actual elements of the array. (normally using indexing facilities)

### Creation

In [None]:
narr = np.arange(15).reshape(3,5)

In [None]:
narr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [None]:
print("shape: ", narr.shape)
print("ndim: ", narr.ndim)
print("dtype.name: ", narr.dtype)
print("itemsize: ", narr.itemsize)
print("size: ", narr.size)
print("type(narr): ", type(narr))


shape:  (3, 5)
ndim:  2
dtype.name:  int64
itemsize:  8
size:  15
type(narr):  <class 'numpy.ndarray'>


In [None]:
b = np.array([2.1, 5.2, 9.8])
print(b.dtype)

float64


In [None]:
c = np.array([[1,2,3],[4,5,6]])
c

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np.ones((2,3,4), dtype=np.int16)

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)

In [None]:
print(np.arange(1, 10, 3))
print(np.arange(0, 2, 0.3))

[1 4 7]
[0.  0.3 0.6 0.9 1.2 1.5 1.8]


In [None]:
# because the finite floating point precision, it is usually better to use 
# the function `linspace` that receives as an argument the number of elements 
# that we want, instead of the step
print(np.linspace(0,2,5))
print(np.linspace(0,2,4))

[0.  0.5 1.  1.5 2. ]
[0.         0.66666667 1.33333333 2.        ]


### Basic Operations

In [None]:
a = np.array([3,4,5,6])
b = np.arange(4)
print(a)
print(b)

[3 4 5 6]
[0 1 2 3]


In [None]:
c = a - b 
print(c)

[3 3 3 3]


In [None]:
print(b**2)
print(a < 5)

[0 1 4 9]
[ True  True False False]


In [None]:
# the product operator * operates elementwise in numpy arrays. 
# the matrix product, use the @ operator (in python >= 3.5) or the `dot` method

A = np.array([[1,0],
              [0,1]])
B = np.array([[1,2],
              [3,4]])
# elementwise product
print(A*B) 
print("A @ B: ")
print(A @ B)
print()
print("A.dot(B): ")
print(A.dot(B))

[[1 0]
 [0 4]]
A @ B: 
[[1 2]
 [3 4]]

A.dot(B): 
[[1 2]
 [3 4]]


In [None]:
# create instance of default random number generator
rg = np.random.default_rng(1)
e = np.ones((2,3), dtype=int)
f = rg.random((2,3))
print(e)
print(f)

[[1 1 1]
 [1 1 1]]
[[0.51182162 0.9504637  0.14415961]
 [0.94864945 0.31183145 0.42332645]]


In [None]:
e *= 2
print(e)

[[2 2 2]
 [2 2 2]]


In [None]:
f += e
print(f)

[[2.51182162 2.9504637  2.14415961]
 [2.94864945 2.31183145 2.42332645]]


In [None]:
# e += f error, f is not automatically converted to integer type
# e += f

UFuncTypeError: ignored

In [None]:
h = np.ones((2,3))
i = np.arange(12).reshape(3,4)
print(h)
print(i)

[[1. 1. 1.]
 [1. 1. 1.]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [None]:
print(h.sum())
print(i.min())
print(i.max())

6.0
0
11


In [None]:
print(i.sum(axis=0))
print(i.sum(axis=1))
print(i.min(axis=0))
print(i.min(axis=1))

[12 15 18 21]
[ 6 22 38]
[0 1 2 3]
[0 4 8]


In [None]:
j = np.arange(3)
print(j)
print(np.exp(j))
print(np.sqrt(j))
print(np.add(j, j))


[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]
[0 2 4]


See also:
`all, any, apply_along_axis, argmax, argmin, argsort, average, bincount, ceil, clip, conj, corrcoef, cov, cross, cumprod, cumsum, diff, dot, floor, inner, invert, lexsort, max, maximum, mean, median, min, minimum, nonzero, outer, prod, re, round, sort, std, sum, trace, transpose, var, vdot, vectorize, where`

### Indexing, Slicing and Iterating

In [None]:
k = np.arange(10)**3
print(k)
print(k[2])
print(k[1:4])
print(k[::-1])

[  0   1   8  27  64 125 216 343 512 729]
8
[ 1  8 27]
[729 512 343 216 125  64  27   8   1   0]


In [None]:
print(k)
# same as k[0:6:2] = 100
# from start to position 6, exclusive, set every 2nd element to 100
k[:6:2] = 100
print(k)

[  0   1   8  27  64 125 216 343 512 729]
[100   1 100  27 100 125 216 343 512 729]


In [2]:
def f(x, y):
  return 10*x+y

In [3]:
m = np.fromfunction(f, (5,4), dtype=int)
print(m)
print(m[2,3])
print(m[0:5, 1])
print(m[:,1])

[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]
 [30 31 32 33]
 [40 41 42 43]]
23
[ 1 11 21 31 41]
[ 1 11 21 31 41]


In [4]:
# each column in the second and third row of m
print(m[1:3, :])

# the last row
print(m[-1])

[[10 11 12 13]
 [20 21 22 23]]
[40 41 42 43]


In [7]:
# m[i] is treated as an i followed by as many instances of : as needed to represent 
# the remaining axes. Also can use dots, e.g., m[i, ...]
# if x is an array with 5 axes, then 
# x[1,2,...]is equivalent to x[1,2,:,:,:]
# x[..., 3] to x[:,:,:,:,3]
# x[4,...,5,:] to x[4, :,:, 5, :]
print(m.shape)
print(m[1,...])

(5, 4)
[10 11 12 13]


In [8]:
# Iterating over multidimensional arrays is done with respect to the first axis
for row in m:
  print(row)

[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]


In [9]:
# if one wants to perform an operation on each element in 
# the array, one can use the flat attribute which is an iterator
# over all the elements of the array
n = np.arange(6).reshape(2,3)
print(n)
for element in n.flat:
  print(element)

[[0 1 2]
 [3 4 5]]
0
1
2
3
4
5


### Shape Manipulation

In [10]:
# ravel returns the array, flattened
print(n.ravel())
print(n.T)
print(n.shape, " -> ", n.T.shape)

[0 1 2 3 4 5]
[[0 3]
 [1 4]
 [2 5]]
(2, 3)  ->  (3, 2)


In [13]:
# if a dimension is given as -1 in a reshaping operation, the other dimensions
# are automatically calculated

print(n.reshape(3,-1))

[[0 1]
 [2 3]
 [4 5]]


In [21]:
# reshape returns a modified shape, whereas the ndarray.resize method modifies the array itself
j = np.arange(6).reshape(2,3)
print(j)

[[0 1 2]
 [3 4 5]]


In [22]:
print(j.reshape(3,2))
print(j)

[[0 1]
 [2 3]
 [4 5]]
[[0 1 2]
 [3 4 5]]


In [19]:
print(j)
j.resize(3,2)
print(j)

[[0 1]
 [2 3]
 [4 5]]
[[0 1]
 [2 3]
 [4 5]]


### Stacking

In [23]:
x = np.arange(6).reshape(2,3)
y = np.ones_like(x)
print(x)
print(y)

[[0 1 2]
 [3 4 5]]
[[1 1 1]
 [1 1 1]]


In [25]:
print(np.vstack((x,y)))
print(np.hstack((x,y)))

[[0 1 2]
 [3 4 5]
 [1 1 1]
 [1 1 1]]
[[0 1 2 1 1 1]
 [3 4 5 1 1 1]]


In general, for arrays with more than two dimensions, `hstack` stacks along their second axes, `vstack` stacks along their first axes, and `concatenate` allows for an optional arguments giving the number of the axis along which teh concatenation should happen.

In [26]:
print(np.column_stack((x,y)))
# row_stack is an alias for vstack
print(np.row_stack((x,y)))

[[0 1 2 1 1 1]
 [3 4 5 1 1 1]]
[[0 1 2]
 [3 4 5]
 [1 1 1]
 [1 1 1]]


### Splitting arrays


Using `hsplit`, you can split an array along its horizaontal axis, either by specifying the number of equally shaped arrays to return , or by  specifying the columns after which the division shoudl occur.
`vsplit` splits along the vertical axis, and `array_split` allows one to specify along which axis to split.

In [27]:
z = np.arange(12).reshape((2,6))
print(z)
print(np.hsplit(z,3))

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
[array([[0, 1],
       [6, 7]]), array([[2, 3],
       [8, 9]]), array([[ 4,  5],
       [10, 11]])]


In [29]:
z.resize((4,3))
print(z)
print(np.vsplit(z,2))

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[array([[0, 1, 2],
       [3, 4, 5]]), array([[ 6,  7,  8],
       [ 9, 10, 11]])]


### Copies and Views

In [32]:
# simple assignments make no copy of objects or their data
a = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])
b = a # no new object is created
print(b is a)
# id() is a unique identifier of an object
print(id(a))
print(id(b))

True
140442410667536
140442410667536


140442410665904


The `view` method creates a new array object that looks at the same data.

In [33]:
c = a.view()
print(c is a) 
print(c.base is a ) # c is a view of the data owned by a

False
True


In [35]:
# c reshape, a's shape doesn't change
c = c.reshape(((1,-1)))
print(c)
print(a)
print(c.shape)
print(a.shape)

[[1 2 3 4 5 6 7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
(1, 9)
(3, 3)


In [36]:
# change c, a's data changes
c[0,0]=10
print(c)
print(a)

[[10  2  3  4  5  6  7  8  9]]
[[10  2  3]
 [ 4  5  6]
 [ 7  8  9]]


In [37]:
# Slicing an array returns a view of it
s = a[:, 0:2]
print(a)
print(s)

[[10  2  3]
 [ 4  5  6]
 [ 7  8  9]]
[[10  2]
 [ 4  5]
 [ 7  8]]


In [38]:
# s[:]] is a view of s, the difference between s = 10 and s[:]=10
s[:]=99
print(s)
print(a)

[[99 99]
 [99 99]
 [99 99]]
[[99 99  3]
 [99 99  6]
 [99 99  9]]


In [39]:
s=88
print(s)
print(a)

88
[[99 99  3]
 [99 99  6]
 [99 99  9]]


In [40]:
# Deep copy, the ocpy method makes a complete copy the the array and its data
a = np.arange(12).reshape((3,4))
d = a.copy()
print(d is a)
print(d.base is a)
print(d)

False
False
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [41]:
d[0,0]=29
print(a)
print(d)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[29  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


Sometimes `copy` should be called after slicing if the original array is not required anymore. For example, suppose a is a huge intermediate result and the final result b only contains a small fraction of a, a deep copy should be made when constructing b with slicing.

In [42]:
a = np.arange(1000)
b = a[:9].copy()
del a # the memory of `a` can be released
print(b)

[0 1 2 3 4 5 6 7 8]
