# Numpy

> Numpy

In [None]:
#| default_exp num

## Import Numpy

In [None]:
#|eval: false
!pip list | grep numpy

numpy                         1.23.5


In [None]:
import numpy as np 

## Creating Numpy Arrays

### Python sequences to NumPy Arrays

In [None]:
a1D = np.array([1, 2, 3, 4])

a2D = np.array([[1, 2], [3, 4]])

a3D = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

a1D, a2D, a3D

(array([1, 2, 3, 4]),
 array([[1, 2],
        [3, 4]]),
 array([[[1, 2],
         [3, 4]],
 
        [[5, 6],
         [7, 8]]]))

In [None]:
a = np.array([127, 128, 129], dtype=np.int8)
a

array([ 127, -128, -127], dtype=int8)

### Intrinsic NumPy array creation functions

In [None]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
np.arange(2, 10, dtype=float)

array([2., 3., 4., 5., 6., 7., 8., 9.])

In [None]:
np.arange(2, 3, 0.1)

array([2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9])

In [None]:
np.linspace(1., 4., 6)

array([1. , 1.6, 2.2, 2.8, 3.4, 4. ])

In [None]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
np.eye(3, 5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])

In [None]:
np.diag([1, 2, 3])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [None]:
np.diag([1, 2, 3], 1)

array([[0, 1, 0, 0],
       [0, 0, 2, 0],
       [0, 0, 0, 3],
       [0, 0, 0, 0]])

In [None]:
np.vander(np.linspace(0, 2, 5), 3)

array([[0.  , 0.  , 1.  ],
       [0.25, 0.5 , 1.  ],
       [1.  , 1.  , 1.  ],
       [2.25, 1.5 , 1.  ],
       [4.  , 2.  , 1.  ]])

In [None]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [None]:
np.ones((2, 3, 2))

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [None]:
from numpy.random import default_rng
default_rng(42).random((2,3))

array([[0.77395605, 0.43887844, 0.85859792],
       [0.69736803, 0.09417735, 0.97562235]])

In [None]:
np.indices((3,3))

array([[[0, 0, 0],
        [1, 1, 1],
        [2, 2, 2]],

       [[0, 1, 2],
        [0, 1, 2],
        [0, 1, 2]]])

### Replicating, joining, or mutating existing arrays

In [None]:
a = np.array([1, 2, 3, 4, 5, 6])

b = a[:2]

b += 1

print('a =', a, '; b =', b)

a = [2 3 3 4 5 6] ; b = [2 3]


In [None]:
a = np.array([1, 2, 3, 4])

b = a[:2].copy()

b += 1

print('a = ', a, 'b = ', b)

a =  [1 2 3 4] b =  [2 3]


In [None]:
A = np.ones((2, 2))

B = np.eye(2, 2)

C = np.zeros((2, 2))

D = np.diag((-3, -4))

np.block([[A, B], [C, D]])

array([[ 1.,  1.,  1.,  0.],
       [ 1.,  1.,  0.,  1.],
       [ 0.,  0., -3.,  0.],
       [ 0.,  0.,  0., -4.]])

## Indexing

### Basic indexing

In [None]:
x = np.arange(10)
x,x[2]

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 2)

In [None]:
x[-2]

8

In [None]:
x.shape = (2, 5)  # now x is 2-dimensional

x, x[1, 3]

(array([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]]),
 8)

In [None]:
x[0], x[0][2]

(array([0, 1, 2, 3, 4]), 2)

#### Slicing and striding

In [None]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

x[1:7:2]

array([1, 3, 5])

In [None]:
x[-2:10]

array([8, 9])

In [None]:
x[-3:3:-1]

array([7, 6, 5, 4])

In [None]:
x[5:]

array([5, 6, 7, 8, 9])

In [None]:
x = np.array([[[1],[2],[3]], [[4],[5],[6]]])

x.shape, x[1:2]

((2, 3, 1),
 array([[[4],
         [5],
         [6]]]))

#### Dimensional indexing tools

In [None]:
x[..., 0]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
x[:, :, 0]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
x[:, np.newaxis, :, :].shape

(2, 1, 3, 1)

In [None]:
x[:, None, :, :].shape

(2, 1, 3, 1)

In [None]:
x = np.arange(5)
x

array([0, 1, 2, 3, 4])

In [None]:
x[:, np.newaxis] + x[np.newaxis, :]

array([[0, 1, 2, 3, 4],
       [1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7],
       [4, 5, 6, 7, 8]])

### Advanced indexing

In [None]:
x = np.arange(10, 1, -1)

In [None]:
x[np.array([3, 3, 1, 8])]

array([7, 7, 9, 2])

In [None]:
x = np.array([[1, 2], [3, 4], [5, 6]])

In [None]:
y = np.arange(35).reshape(5, 7)

y

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34]])

In [None]:
y[np.array([0, 2, 4]), np.array([0, 1, 2])]

array([ 0, 15, 30])

In [None]:
y[np.array([0, 2, 4]), 1]

array([ 1, 15, 29])

In [None]:
y[np.array([0, 2, 4])]

array([[ 0,  1,  2,  3,  4,  5,  6],
       [14, 15, 16, 17, 18, 19, 20],
       [28, 29, 30, 31, 32, 33, 34]])

In [None]:
x = np.array([[1, 2], [3, 4], [5, 6]])

x[[0, 1, 2], [0, 1, 0]]

array([1, 4, 5])

In [None]:
x = np.array([[ 0,  1,  2],

              [ 3,  4,  5],

              [ 6,  7,  8],

              [ 9, 10, 11]])

rows = np.array([[0, 0],

                 [3, 3]], dtype=np.intp)

columns = np.array([[0, 2],

                    [0, 2]], dtype=np.intp)

x[rows, columns]

array([[ 0,  2],
       [ 9, 11]])

In [None]:
rows = np.array([0, 3], dtype=np.intp)

columns = np.array([0, 2], dtype=np.intp)

rows[:, np.newaxis]

array([[0],
       [3]])

In [None]:
x[rows[:, np.newaxis], columns]

array([[ 0,  2],
       [ 9, 11]])

In [None]:
x[np.ix_(rows, columns)]

array([[ 0,  2],
       [ 9, 11]])

In [None]:
x = np.array([[1., 2.], [np.nan, 3.], [np.nan, np.nan]])

x[~np.isnan(x)]

array([1., 2., 3.])

In [None]:
x = np.array([1., -1., -2., 3])

x[x < 0] += 20

x

array([ 1., 19., 18.,  3.])

In [None]:
x = np.arange(35).reshape(5, 7)

b = x > 20
b

array([[False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True]])

In [None]:
b[:, 5]

array([False, False, False,  True,  True])

In [None]:
x[b[:, 5]]

array([[21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34]])

In [None]:
x = np.array([[ 0,  1,  2],

              [ 3,  4,  5],

              [ 6,  7,  8],

              [ 9, 10, 11]])

rows = (x.sum(-1) % 2) == 0

rows

array([False,  True, False,  True])

In [None]:
columns = [0, 2]

x[np.ix_(rows, columns)]

array([[ 3,  5],
       [ 9, 11]])

In [None]:
rows = rows.nonzero()[0]

x[rows[:, np.newaxis], columns]

array([[ 3,  5],
       [ 9, 11]])

In [None]:
x = np.arange(30).reshape(2, 3, 5)

x

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],

       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

In [None]:
b = np.array([[True, True, False], [False, True, True]])

x[b]

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]])

In [None]:
y = np.arange(35).reshape(5,7)

y[np.array([0, 2, 4]), 1:3]

array([[ 1,  2],
       [15, 16],
       [29, 30]])

In [None]:
y[:, 1:3][np.array([0, 2, 4]), :]

array([[ 1,  2],
       [15, 16],
       [29, 30]])

In [None]:
x = np.array([[ 0,  1,  2],

              [ 3,  4,  5],

              [ 6,  7,  8],

              [ 9, 10, 11]])

x[1:2, 1:3]

array([[4, 5]])

In [None]:
x[1:2, [1, 2]]

array([[4, 5]])

In [None]:
x = np.arange(35).reshape(5, 7)

b = x > 20

b

array([[False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True]])

In [None]:
x[b[:, 5], 1:3]

array([[22, 23],
       [29, 30]])

### Field Access

In [None]:
x = np.zeros((2, 2), dtype=[('a', np.int32), ('b', np.float64, (3, 3))])
x

array([[(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]),
        (0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])],
       [(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]),
        (0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])]],
      dtype=[('a', '<i4'), ('b', '<f8', (3, 3))])

In [None]:
x['a'].shape, x['b'].shape

((2, 2), (2, 2, 3, 3))

In [None]:
x['a'].dtype, x['b'].dtype

(dtype('int32'), dtype('float64'))

### Assigning values to indexed arrays

In [None]:
x = np.arange(10)

x[2:7] = 1

In [None]:
x[2:7] = np.arange(5)

In [None]:
x[1] = 1.2

x[1]

1

In [None]:
x = np.arange(0, 50, 10)
x

array([ 0, 10, 20, 30, 40])

In [None]:
x[np.array([1, 1, 3, 1])] += 1

x

array([ 0, 11, 20, 31, 40])

### Dealing with variable numbers of indices within programs

In [None]:
z = np.arange(81).reshape(3, 3, 3, 3)

indices = (1, 1, 1, 1)

z[indices]

40

In [None]:
indices = (1, 1, 1, slice(0, 2))  # same as [1, 1, 1, 0:2]

z[indices]

array([39, 40])

In [None]:
indices = (1, Ellipsis, 1)  # same as [1, ..., 1]

z[indices]

array([[28, 31, 34],
       [37, 40, 43],
       [46, 49, 52]])

In [None]:
z[[1, 1, 1, 1]]  # produces a large array

array([[[[27, 28, 29],
         [30, 31, 32],
         [33, 34, 35]],

        [[36, 37, 38],
         [39, 40, 41],
         [42, 43, 44]],

        [[45, 46, 47],
         [48, 49, 50],
         [51, 52, 53]]],


       [[[27, 28, 29],
         [30, 31, 32],
         [33, 34, 35]],

        [[36, 37, 38],
         [39, 40, 41],
         [42, 43, 44]],

        [[45, 46, 47],
         [48, 49, 50],
         [51, 52, 53]]],


       [[[27, 28, 29],
         [30, 31, 32],
         [33, 34, 35]],

        [[36, 37, 38],
         [39, 40, 41],
         [42, 43, 44]],

        [[45, 46, 47],
         [48, 49, 50],
         [51, 52, 53]]],


       [[[27, 28, 29],
         [30, 31, 32],
         [33, 34, 35]],

        [[36, 37, 38],
         [39, 40, 41],
         [42, 43, 44]],

        [[45, 46, 47],
         [48, 49, 50],
         [51, 52, 53]]]])

In [None]:
z[(1, 1, 1, 1)]  # returns a single value

40

## I/O with Numpy

### Splitting the lines into columns

In [None]:
import numpy as np

from io import StringIO

In [None]:
data = u"1, 2, 3\n4, 5, 6"

np.genfromtxt(StringIO(data), delimiter=",")

array([[1., 2., 3.],
       [4., 5., 6.]])

In [None]:
data = u"  1  2  3\n  4  5 67\n890123  4"

np.genfromtxt(StringIO(data), delimiter=3)

array([[  1.,   2.,   3.],
       [  4.,   5.,  67.],
       [890., 123.,   4.]])

In [None]:
data = u"123456789\n   4  7 9\n   4567 9"

np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))

array([[1234.,  567.,   89.],
       [   4.,    7.,    9.],
       [   4.,  567.,    9.]])

In [None]:
data = u"1, abc , 2\n 3, xxx, 4"

# Without autostrip

np.genfromtxt(StringIO(data), delimiter=",", dtype="|U5")

array([['1', ' abc ', ' 2'],
       ['3', ' xxx', ' 4']], dtype='<U5')

In [None]:
np.genfromtxt(StringIO(data), delimiter=",", dtype="|U5", autostrip=True)

array([['1', 'abc', '2'],
       ['3', 'xxx', '4']], dtype='<U5')

In [None]:
data = u"""#

# Skip me !

# Skip me too !

1, 2

3, 4

5, 6 #This is the third line of the data

7, 8

# And here comes the last line

9, 0

"""

np.genfromtxt(StringIO(data), comments="#", delimiter=",")

array([[1., 2.],
       [3., 4.],
       [5., 6.],
       [7., 8.],
       [9., 0.]])

### Skipping lines and choosing columns

In [None]:
data = u"\n".join(str(i) for i in range(10))

np.genfromtxt(StringIO(data),)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [None]:
np.genfromtxt(StringIO(data),
              skip_header=3, skip_footer=5)

array([3., 4.])

In [None]:
data = u"1 2 3\n4 5 6"

np.genfromtxt(StringIO(data), usecols=(0, -1))

array([[1., 3.],
       [4., 6.]])

In [None]:
data = u"1 2 3\n4 5 6"

np.genfromtxt(StringIO(data),
              names="a, b, c", usecols=("a", "c"))

array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])

In [None]:
np.genfromtxt(StringIO(data),
              names="a, b, c", usecols=("a, c"))

array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])

### Choosing the data type

The main way to control how the sequences of strings we have read from the file are converted to other types is to set the dtype argument. Acceptable values for this argument are:

- a single type, such as dtype=float. The output will be 2D with the given dtype, unless a name has been associated with each column with the use of the names argument (see below). Note that dtype=float is the default for genfromtxt.

- a sequence of types, such as dtype=(int, float, float).

- a comma-separated string, such as dtype="i4,f8,|U3".

- a dictionary with two keys 'names' and 'formats'.

- a sequence of tuples (name, type), such as dtype=[('A', int), ('B', float)].

- an existing numpy.dtype object.

- the special value None. In that case, the type of the columns will be determined from the data itself (see below).

In all the cases but the first one, the output will be a 1D array with a structured dtype. This dtype has as many fields as items in the sequence. The field names are defined with the names keyword.

When dtype=None, the type of each column is determined iteratively from its data. We start by checking whether a string can be converted to a boolean (that is, if the string matches true or false in lower cases); then whether it can be converted to an integer, then to a float, then to a complex and eventually to a string.

The option dtype=None is provided for convenience. However, it is significantly slower than setting the dtype explicitly.


### Setting the names

In [None]:
data = StringIO("1 2 3\n 4 5 6")

a = np.genfromtxt(data, dtype=[(_, int) for _ in "abc"])
a

array([(1, 2, 3), (4, 5, 6)],
      dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8')])

In [None]:
a['a']

array([1, 4])

In [None]:
data = StringIO("1 2 3\n 4 5 6")

np.genfromtxt(data, names="A, B, C")

array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8')])

In [None]:
data = StringIO("So it goes\n#a b c\n1 2 3\n 4 5 6")

np.genfromtxt(data, skip_header=1, names=True)

array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])

In [None]:
data = StringIO("1 2 3\n 4 5 6")

ndtype=[('a',int), ('b', float), ('c', int)]

names = ["A", "B", "C"]

np.genfromtxt(data, names=names, dtype=ndtype)

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('A', '<i8'), ('B', '<f8'), ('C', '<i8')])

In [None]:
data = StringIO("1 2 3\n 4 5 6")

np.genfromtxt(data, dtype=(int, float, int))

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])

In [None]:
data = StringIO("1 2 3\n 4 5 6")

np.genfromtxt(data, dtype=(int, float, int), names="a")

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('a', '<i8'), ('f0', '<f8'), ('f1', '<i8')])

In [None]:
data = StringIO("1 2 3\n 4 5 6")

np.genfromtxt(data, dtype=(int, float, int), defaultfmt="var_%02i")

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('var_00', '<i8'), ('var_01', '<f8'), ('var_02', '<i8')])

### Tweaking the conversion

In [None]:
convertfunc = lambda x: float(x.strip(b"%"))/100.

data = u"1, 2.3%, 45.\n6, 78.9%, 0"

names = ("i", "p", "n")

# General case .....
np.genfromtxt(StringIO(data), delimiter=",", names=names)

array([(1., nan, 45.), (6., nan,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [None]:
# Converted case ...
np.genfromtxt(StringIO(data), delimiter=",", names=names,
              converters={1: convertfunc})

array([(1., 0.023, 45.), (6., 0.789,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [None]:
# Using a name for the converter ...

np.genfromtxt(StringIO(data), delimiter=",", names=names,
              converters={"p": convertfunc})

array([(1., 0.023, 45.), (6., 0.789,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [None]:
data = u"1, , 3\n 4, 5, 6"

convert = lambda x: float(x.strip() or -999)

np.genfromtxt(StringIO(data), delimiter=",",

              converters={1: convert})

array([[   1., -999.,    3.],
       [   4.,    5.,    6.]])

In [None]:
data = u"N/A, 2, 3\n4, ,???"

kwargs = dict(delimiter=",",

              dtype=int,

              names="a,b,c",

              missing_values={0:"N/A", 'b':" ", 2:"???"},

              filling_values={0:0, 'b':0, 2:-999})

np.genfromtxt(StringIO(data), **kwargs)

array([(0, 2,    3), (4, 0, -999)],
      dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8')])