# Библиотека `numpy`
`numpy` предоставляет возможность работы с многомерными типизированными массивами.

In [1]:
import numpy as np

## Основы `numpy`

In [2]:
# built-in list
arr_py = [1, 2, 3.0, "abc", [1, 2]]
arr_py

[1, 2, 3.0, 'abc', [1, 2]]

In [3]:
# numpy array
arr_np = np.array([1, 2, 3, 4, 5])
arr_np

array([1, 2, 3, 4, 5])

In [4]:
# numpy array has type
arr_np.dtype

dtype('int64')

In [5]:
# all elements is of this tipe
arr_np[0] = 0.5
arr_np

array([0, 2, 3, 4, 5])

In [6]:
# there should be an error
arr_np[0] = "abc"

ValueError: invalid literal for int() with base 10: 'abc'

## Производительность `numpy`

Реализуем алгоритм получения суммы квадратов чисел из массива и измерим скорость его выполнения.

Сначала просто на Питоне:

In [7]:
def sqsum_py(data):
    a = 0
    for x in data:
        a += x**2
    return a

In [8]:
sqsum_py([1, 2, 3])

14

Теперь с помощью `numpy`:

In [9]:
def sqsum_np(data):
    return np.sum(data**2)

In [10]:
sqsum_np(np.array([1, 2, 3]))

14

Посчитаем сумму большого числа элементов с помощью этих двух реализаций и измерим время:

In [11]:
n = 10000000

In [12]:
arr_py = list(range(n))

In [13]:
arr_np = np.arange(n)

In [14]:
sqsum_py(arr_py)

333333283333335000000

In [15]:
%timeit sqsum_py(arr_py)

2.79 s ± 173 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
sqsum_np(arr_np)

1291890006563070912

In [17]:
%timeit sqsum_np(arr_np)

27.3 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Многомерные массивы

### Двумерный массив (матрица):

In [18]:
m = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12]
])
m

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [19]:
m[1,1]

6

In [20]:
m = np.arange(12)
m

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [21]:
mm = m.reshape(4, 3).copy()
mm

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [22]:
mm[0,0] = -1
mm

array([[-1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [23]:
np.repeat(np.arange(3).reshape(1, -1), 3, axis=0)

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

### Трёхмерный массив:

In [24]:
m

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [25]:
mmm = m.reshape(2, 2, 3).copy()
mmm

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [26]:
mmm[1,1,1]

10

## Операции с массивами `numpy`

### Операция с массивом и скаляром

In [27]:
v = np.arange(10)

In [28]:
v

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
v + 10

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [30]:
v *= 2

In [31]:
v

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

### Операция с двумя массивами одинаковой размерности

In [32]:
v2 = np.array(list(reversed(range(10))), dtype=np.double)

In [33]:
v2

array([9., 8., 7., 6., 5., 4., 3., 2., 1., 0.])

In [34]:
(v.dtype, v2.dtype)

(dtype('int64'), dtype('float64'))

In [35]:
v + v2

array([ 9., 10., 11., 12., 13., 14., 15., 16., 17., 18.])

### Умножение матрицы и вектора

In [36]:
m = np.arange(12).reshape(4,3)
m

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [37]:
v = np.array([1, 2, 3])

In [38]:
v.reshape(-1, 1)

array([[1],
       [2],
       [3]])

In [39]:
np.dot(m, v)

array([ 8, 26, 44, 62])

In [40]:
m.transpose()

array([[ 0,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2,  5,  8, 11]])

In [41]:
np.dot(m, m.transpose())

array([[  5,  14,  23,  32],
       [ 14,  50,  86, 122],
       [ 23,  86, 149, 212],
       [ 32, 122, 212, 302]])

### Broadcasting

In [42]:
v

array([1, 2, 3])

In [43]:
v + 1

array([2, 3, 4])

In [44]:
m + v

array([[ 1,  3,  5],
       [ 4,  6,  8],
       [ 7,  9, 11],
       [10, 12, 14]])

In [45]:
b = np.ones((2, 1, 2))
c = np.ones((1, 2, 2))

In [46]:
(b + c).shape

(2, 2, 2)

## Сечение массивов `numpy`

In [47]:
m

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [48]:
m[1,2]

5

In [49]:
[1, 2, 3, 4, 5][2:4]

[3, 4]

In [50]:
np.arange(10)[2:-2]

array([2, 3, 4, 5, 6, 7])

In [51]:
m[0:2, 0:2]

array([[0, 1],
       [3, 4]])

При взяии сечения данные переиспользуются, а не копируются

In [52]:
v = np.arange(10)
v

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [53]:
w = v[2:-2]
w

array([2, 3, 4, 5, 6, 7])

In [54]:
w *= -1

In [55]:
v

array([ 0,  1, -2, -3, -4, -5, -6, -7,  8,  9])