# NumPy ndarrays

Many examples are from the [Numpy documentation](https://numpy.org/doc/stable/user/quickstart.html)

In [1]:
import numpy as np

## Create array from list or other sequence
### Create 1-dimensional arrays from a sequence

In [2]:
a = np.array([1, 2, 3, 4])
print(a)
print(a.dtype)
print(a.shape)

[1 2 3 4]
int64
(4,)


### Create multi-dimensional arrays from nested sequences

In [3]:
b = np.array([(1.5, 2, 3), (4, 5, 6)])
print(b)
print(b.dtype)
print(b.shape)

[[1.5 2.  3. ]
 [4.  5.  6. ]]
float64
(2, 3)


### Specify type of array at creation

In [4]:
c = np.array([[1, 2], [3, 4]], dtype=complex)
print(c)
print(c.dtype)

[[1.+0.j 2.+0.j]
 [3.+0.j 4.+0.j]]
complex128


## Create array with specific size and placeholder content
### Initial values are 0

In [13]:
d = np.zeros((3, 4))
print(d)
print(d.dtype)
print(d.shape)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
int8
(3, 4)


array([[-128,    0,    0,    0],
       [   0,    0,    0,    0],
       [   0,    0,    0,    0]], dtype=int8)

### Initial values are 1

In [14]:
e = np.ones((2, 3, 4), dtype=np.int16)
print(e)
print(e.dtype)
print(e.shape)

[[[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]

 [[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]]
int16
(2, 3, 4)


### Initial values are not set (values are existing memory content)

Play around with setting a dtype to see resulting initial content.

In [18]:
np.empty((2, 3), dtype='float64')

array([[0, 0, 1],
       [0, 2, 0]], dtype=int32)

## Create array with range of elements

### arange() is like built-in range(start, end, step)

In [19]:
np.arange(10, 30, 5)

array([10, 15, 20, 25])

### arange() accepts float arguments

In [22]:
np.arange(0, 2, 0.25)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75])

### linspace() is like arange(), but provide num_elements instead of step
Example returns a 9 element array from 0 to 2, equally spaced

In [21]:
i = np.linspace(0, 2, 9)
print(i)
print(len(i))

[0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.  ]
9


## Create random array
For random random numbers, use the `np.random` module.

In [29]:
np.random.random((3, 4))

array([[0.8199335 , 0.37041696, 0.15311094, 0.1695369 ],
       [0.21383126, 0.94433587, 0.73096961, 0.74004217],
       [0.33322285, 0.47829379, 0.35369917, 0.55640786]])

To create consistent and reproducible random numbers, create a random number generator (rng) with an initial seed value.

In [27]:
rg = np.random.default_rng(0)   # create default random number generator with seed 0
rg.uniform(-10, 10, size=(2, 3))  # same values every time you run the cell

array([[ 2.73923375, -4.60426572, -9.18052952],
       [-9.66944729,  6.26540478,  8.25511155]])

In [30]:
rg.integers(1, 10, size=(2, 3, 4))

array([[[5, 6, 9, 7],
        [6, 5, 6, 9],
        [3, 8, 7, 1]],

       [[4, 8, 5, 1],
        [7, 7, 8, 2],
        [1, 8, 1, 5]]])

## Save and load data from file

### Save array to .npy file (platform independent)

In [14]:
np.save('numpy_file', i)

### Save array to .npy file

In [15]:
j = np.load('numpy_file.npy')
j

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

## Printing
NumPy displays arrays like nested lists, but with the following layout:
- the last axis is printed from left to right,
- the second-to-last is printed from top to bottom,
- the rest are printed from top to bottom, with each slice separated from the next by an empty line.

In [31]:
as_lists = [
    [
        [
            [f'{i}{j}{k}{l}' for l in range(5)]
            for k in range(4)
        ]
        for j in range(3)
    ] for i in range(2)
]
as_lists

[[[['0000', '0001', '0002', '0003', '0004'],
   ['0010', '0011', '0012', '0013', '0014'],
   ['0020', '0021', '0022', '0023', '0024'],
   ['0030', '0031', '0032', '0033', '0034']],
  [['0100', '0101', '0102', '0103', '0104'],
   ['0110', '0111', '0112', '0113', '0114'],
   ['0120', '0121', '0122', '0123', '0124'],
   ['0130', '0131', '0132', '0133', '0134']],
  [['0200', '0201', '0202', '0203', '0204'],
   ['0210', '0211', '0212', '0213', '0214'],
   ['0220', '0221', '0222', '0223', '0224'],
   ['0230', '0231', '0232', '0233', '0234']]],
 [[['1000', '1001', '1002', '1003', '1004'],
   ['1010', '1011', '1012', '1013', '1014'],
   ['1020', '1021', '1022', '1023', '1024'],
   ['1030', '1031', '1032', '1033', '1034']],
  [['1100', '1101', '1102', '1103', '1104'],
   ['1110', '1111', '1112', '1113', '1114'],
   ['1120', '1121', '1122', '1123', '1124'],
   ['1130', '1131', '1132', '1133', '1134']],
  [['1200', '1201', '1202', '1203', '1204'],
   ['1210', '1211', '1212', '1213', '1214'],
   [

In [32]:
ar = np.array(as_lists)
ar

array([[[['0000', '0001', '0002', '0003', '0004'],
         ['0010', '0011', '0012', '0013', '0014'],
         ['0020', '0021', '0022', '0023', '0024'],
         ['0030', '0031', '0032', '0033', '0034']],

        [['0100', '0101', '0102', '0103', '0104'],
         ['0110', '0111', '0112', '0113', '0114'],
         ['0120', '0121', '0122', '0123', '0124'],
         ['0130', '0131', '0132', '0133', '0134']],

        [['0200', '0201', '0202', '0203', '0204'],
         ['0210', '0211', '0212', '0213', '0214'],
         ['0220', '0221', '0222', '0223', '0224'],
         ['0230', '0231', '0232', '0233', '0234']]],


       [[['1000', '1001', '1002', '1003', '1004'],
         ['1010', '1011', '1012', '1013', '1014'],
         ['1020', '1021', '1022', '1023', '1024'],
         ['1030', '1031', '1032', '1033', '1034']],

        [['1100', '1101', '1102', '1103', '1104'],
         ['1110', '1111', '1112', '1113', '1114'],
         ['1120', '1121', '1122', '1123', '1124'],
         ['1130', '11

In [33]:
ar[0, 1, 2, 3]

'0123'

In [34]:
ar[0][1][2][3]

'0123'

In [35]:
ar_ints = ar.astype(np.int16)
ar_ints

array([[[[   0,    1,    2,    3,    4],
         [  10,   11,   12,   13,   14],
         [  20,   21,   22,   23,   24],
         [  30,   31,   32,   33,   34]],

        [[ 100,  101,  102,  103,  104],
         [ 110,  111,  112,  113,  114],
         [ 120,  121,  122,  123,  124],
         [ 130,  131,  132,  133,  134]],

        [[ 200,  201,  202,  203,  204],
         [ 210,  211,  212,  213,  214],
         [ 220,  221,  222,  223,  224],
         [ 230,  231,  232,  233,  234]]],


       [[[1000, 1001, 1002, 1003, 1004],
         [1010, 1011, 1012, 1013, 1014],
         [1020, 1021, 1022, 1023, 1024],
         [1030, 1031, 1032, 1033, 1034]],

        [[1100, 1101, 1102, 1103, 1104],
         [1110, 1111, 1112, 1113, 1114],
         [1120, 1121, 1122, 1123, 1124],
         [1130, 1131, 1132, 1133, 1134]],

        [[1200, 1201, 1202, 1203, 1204],
         [1210, 1211, 1212, 1213, 1214],
         [1220, 1221, 1222, 1223, 1224],
         [1230, 1231, 1232, 1233, 1234]]]], d

### Array axes

In [36]:
ar_ints.max()  # Max of entire array

1234

In [37]:
ar_ints.max(0)  # Max of axes 0

array([[[1000, 1001, 1002, 1003, 1004],
        [1010, 1011, 1012, 1013, 1014],
        [1020, 1021, 1022, 1023, 1024],
        [1030, 1031, 1032, 1033, 1034]],

       [[1100, 1101, 1102, 1103, 1104],
        [1110, 1111, 1112, 1113, 1114],
        [1120, 1121, 1122, 1123, 1124],
        [1130, 1131, 1132, 1133, 1134]],

       [[1200, 1201, 1202, 1203, 1204],
        [1210, 1211, 1212, 1213, 1214],
        [1220, 1221, 1222, 1223, 1224],
        [1230, 1231, 1232, 1233, 1234]]], dtype=int16)

In [23]:
ar_ints.max(1)  # Max of axes 1

array([[[ 200,  201,  202,  203,  204],
        [ 210,  211,  212,  213,  214],
        [ 220,  221,  222,  223,  224],
        [ 230,  231,  232,  233,  234]],

       [[1200, 1201, 1202, 1203, 1204],
        [1210, 1211, 1212, 1213, 1214],
        [1220, 1221, 1222, 1223, 1224],
        [1230, 1231, 1232, 1233, 1234]]], dtype=int16)

In [24]:
ar_ints.max(2)  # Max of axes 2

array([[[  30,   31,   32,   33,   34],
        [ 130,  131,  132,  133,  134],
        [ 230,  231,  232,  233,  234]],

       [[1030, 1031, 1032, 1033, 1034],
        [1130, 1131, 1132, 1133, 1134],
        [1230, 1231, 1232, 1233, 1234]]], dtype=int16)

In [25]:
ar_ints.max(3)  # Max of axes 3

array([[[   4,   14,   24,   34],
        [ 104,  114,  124,  134],
        [ 204,  214,  224,  234]],

       [[1004, 1014, 1024, 1034],
        [1104, 1114, 1124, 1134],
        [1204, 1214, 1224, 1234]]], dtype=int16)

## Methods

In [38]:
k = np.array([
    (1, 2, 3),
    (4, 5, 6)
])
print(k)
print(k.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


### You can perform calculations on the whole array

In [39]:
print('Sum:', k.sum())
print('Min:', k.min())
print('Max:', k.max())

Sum: 21
Min: 1
Max: 6


### If you pass an axis in, it calculates over that axis
For a 2d array, axis 0 is vertical

In [40]:
print('Sum:', k.sum(0))
print('Min:', k.min(0))
print('Max:', k.max(0))

Sum: [5 7 9]
Min: [1 2 3]
Max: [4 5 6]


For a 2d array, axis 1 is horizontal

In [29]:
print('Sum:', k.sum(1))
print('Min:', k.min(1))
print('Max:', k.max(1))

Sum: [ 6 15]
Min: [1 4]
Max: [3 6]


### Conversions

In [30]:
k.tolist()

[[1, 2, 3], [4, 5, 6]]

In [31]:
k.astype(str)

array([['1', '2', '3'],
       ['4', '5', '6']], dtype='<U21')

### Changing shape

In [41]:
k.ravel()

array([1, 2, 3, 4, 5, 6])

In [42]:
k.reshape((3, 2))

array([[1, 2],
       [3, 4],
       [5, 6]])

In [43]:
k.T  # Transposed (axes swapped)

array([[1, 4],
       [2, 5],
       [3, 6]])

## Basic operations

Basic operations return a new array, with operation performed on each element

### Single number operations

In [44]:
l = np.array([10, 20, 30, 40])
l

array([10, 20, 30, 40])

In [45]:
l - 1

array([ 9, 19, 29, 39])

In [46]:
l * 2.5  # (element multiplication)

array([ 25.,  50.,  75., 100.])

In [38]:
l ** 2

array([ 100,  400,  900, 1600])

In [39]:
10 * np.sin(l)

array([-5.44021111,  9.12945251, -9.88031624,  7.4511316 ])

In [47]:
l < 25

array([ True,  True, False, False])

In [66]:
l[l < 25]

array([10, 20])

### 1d array operations
Operations can be performed against an array of same size

In [49]:
m = np.arange(4)
m

array([0, 1, 2, 3])

In [50]:
l - m

array([10, 19, 28, 37])

In [51]:
l * m

array([  0,  20,  60, 120])

In [52]:
l @ m  # 200 (dot product), or

200

In [45]:
l.dot(m)  # 200 (dot product)

200

Operating on an array of a different size will throw an error.

In [53]:
try:
    l - [1, 2]
except Exception as e:
    print(repr(e))

ValueError('operands could not be broadcast together with shapes (4,) (2,) ')


### Broadcasting
For multidimensional arrays, basic operations follow [broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html) rules:
- each dimension must either have the same size, or
- one must have size 1

### 2d array operations

In [47]:
n = np.array([(1, 2), (3, 4), (5, 6)])  # (3, 2)
n

array([[1, 2],
       [3, 4],
       [5, 6]])

In [48]:
o = np.array([[1], [2], [3]])  # (3, 1): 1st dim is same size, 2nd dim size is 1
o

array([[1],
       [2],
       [3]])

In [49]:
n * o

array([[ 1,  2],
       [ 6,  8],
       [15, 18]])

In [50]:
p = np.array([[1, 2]])  # (1, 2): 1st dim size is 1, 2nd dim is same size
p

array([[1, 2]])

In [51]:
n * p

array([[ 1,  4],
       [ 3,  8],
       [ 5, 12]])

### Upcasting
When arrays have different types, the result will use the more general/precise type

In [52]:
q = np.ones(3, dtype=np.int32)
q

array([1, 1, 1], dtype=int32)

In [53]:
r = np.linspace(0, 1, 3)
r

array([0. , 0.5, 1. ])

In [54]:
print(q.dtype)
print(r.dtype)

int32
float64


In [55]:
s = q + r
print(s.dtype)
s

float64


array([1. , 1.5, 2. ])

### Augmented assignment operations
These modify the existing array (e.g. +=)

In [56]:
r += q
r

array([1. , 1.5, 2. ])

#### Type errors
Because arrays have a specific type,
you may get type errors if you try to store a more precise type in a less precise array

In [57]:
try:
    q += r
except Exception as e:
    print(repr(e))
    print('Cannot store a float64 type into an int32 type array')

UFuncTypeError(<ufunc 'add'>, 'same_kind', dtype('float64'), dtype('int32'), 2)
Cannot store a float64 type into an int32 type array


## Indexing and slicing
### 1d arrays
Works mostly like lists

In [54]:
t = np.arange(5) * 2
t

array([0, 2, 4, 6, 8])

In [59]:
t[2]

4

In [60]:
t[1:4]

array([2, 4, 6])

In [61]:
t[:]

array([0, 2, 4, 6, 8])

In [62]:
t[::-1]

array([8, 6, 4, 2, 0])

In [55]:
t[[3, 1, 0]]  # choose specific items (can't do this with lists)

array([6, 2, 0])

In [64]:
t[::2] = -1  # set every second number to -1 (can't do this with lists)
t

array([-1,  2, -1,  6, -1])

### Multidimensional arrays

In [56]:
def f(x, y):
    return 10 * x + y

In [57]:
u = np.fromfunction(f, (5, 4), dtype=int)
u

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13],
       [20, 21, 22, 23],
       [30, 31, 32, 33],
       [40, 41, 42, 43]])

#### Nd arrays can have 1 index per axis

In [58]:
u[2, 3]

23

In [59]:
u[:, 1]

array([ 1, 11, 21, 31, 41])

In [60]:
u[1:3, :]

array([[10, 11, 12, 13],
       [20, 21, 22, 23]])

If fewer axes provided, missing ones are considered complete slices `:`

In [61]:
u[-1]  # equivalent to b[-1, :]

array([40, 41, 42, 43])

Can use `...` to fill in empty ones

In [62]:
u[..., -1]  # equivalent to b[:, -1]

array([ 3, 13, 23, 33, 43])

## Iterating
Iterating starts at 0 axis

In [63]:
for row in u:
    print(row)

[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]


Can nest to access higher axes

In [64]:
for row in u:
    for item in row:
        print(item)

0
1
2
3
10
11
12
13
20
21
22
23
30
31
32
33
40
41
42
43


Can iterate over each element using ndarray.flat

In [65]:
for item in u.flat:
    print(item)

0
1
2
3
10
11
12
13
20
21
22
23
30
31
32
33
40
41
42
43
