# Numpy и основы линейной алгебры

семинарист: Альперович Вадим 

```tg: @vadik_alp```

---

# Introduction

<img src="https://ipython.org/_static/IPy_header.png"></img>

### IPython
является оболочкой **Read-Evaluate-Print Loop** для интерактивной разработки Python. Он поддерживает интерактивную визуализацию с использованием графических инструментов GUI и предоставляет ядро ​​для Jupyter.

<p><img alt="jupyter logo" width="90" src="https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Jupyter_logo.svg/1200px-Jupyter_logo.svg.png" hspace="50px" align="left" vspace="0px"></p>

### Jupyter Notebook 

серверно-клиентское приложение, позволяющее редактировать и запускать ipython notebook через веб-браузер. Jupyter Notebook может быть запущено на локальном рабочей машине, не требующей доступа в Интернет, или может быть установлено на удаленном сервере и доступно через Интернет.

**Useful links:**

- Anaconda: https://www.anaconda.com/products/individual
- Miniconda: https://docs.conda.io/en/latest/miniconda.html
- Markdown: https://www.markdownguide.org/basic-syntax

<p><img alt="Colaboratory logo" width="100" src="https://colab.research.google.com/img/colab_favicon_256px.png" hspace="10px" align="left" vspace="0px"></p>


### Google Colaboratory


Colaboratory позволяет писать и выполнять код Python в браузере. При этом:
- не требуется никакой настройки;
- вы получаете бесплатный доступ к графическим процессорам;
- предоставлять доступ к документам другим людям очень просто.


# Introduction to Jupyter

In [1]:
print(1)

1


In [3]:
x = 1
x

1

In [4]:
print("x:", x)

x: 1


Одно из самых полезных **преимуществ** – встроенный редактор $\LaTeX$, который позволяет просто набирать и оформлять формулы:

<br>

$$ \sum_{i=1}^Nx_i^n + y^n = \frac{z^n}{10!}$$

In [5]:
%%time
print(0)

0
Wall time: 0 ns


In [6]:
%pwd

'D:\\MAINOR_ML\\19.01.22'

In [8]:
# !pip freeze

# Numpy

<p><img alt="Colaboratory logo"  width="300" src="https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/NumPy_logo_2020.svg/1280px-NumPy_logo_2020.svg.png" hspace="10px" vspace="0px"></p>



— библиотека с открытым исходным кодом для языка программирования Python. Возможности: поддержка многомерных массивов; поддержка высокоуровневых математических функций, предназначенных для работы с многомерными массивами.

Useful links:
- [Why is NumPy fast?](https://numpy.org/doc/stable/user/whatisnumpy.html)
- [NumPy documentation](https://numpy.org/doc/stable/user/absolute_beginners.html)
- [100 задач NumPy](https://pythonworld.ru/numpy/100-exercises.html)

In [9]:
!pip install numpy



In [10]:
import numpy as np

### Creating matrix / vector

In [12]:
lists = [[26, 1, 0], [5, 13, 5], [18, 3, 1], [13, 10, 0]]
data = np.array(lists, dtype='int')
data

array([[26,  1,  0],
       [ 5, 13,  5],
       [18,  3,  1],
       [13, 10,  0]])

In [13]:
data.shape

(4, 3)

In [14]:
data.dtype

dtype('int32')

In [18]:
matrix = np.eye(5) # np.ones, np.empty
matrix

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [19]:
vec = np.arange(5)
print(vec)
print(vec.shape)

[0 1 2 3 4]
(5,)


In [23]:
np.random.randint(100, size=(5, 10))

array([[ 3, 88, 36, 93, 66, 22, 41, 71,  3, 17],
       [34, 97, 86, 28, 53, 38, 74, 81,  5,  2],
       [63,  4, 64,  6, 63, 42, 90, 94,  6, 89],
       [11, 74, 59, 29, 20, 56, 66, 64, 33, 93],
       [ 3, 24,  9, 62, 63, 90, 84,  2, 42, 98]])

In [26]:
np.set_printoptions(precision=4)
np.random.rand(5, 10)

array([[0.8664, 0.0271, 0.7495, 0.5739, 0.1222, 0.4674, 0.7461, 0.6411,
        0.511 , 0.3472],
       [0.0706, 0.466 , 0.4472, 0.4997, 0.0694, 0.331 , 0.7525, 0.5356,
        0.2618, 0.393 ],
       [0.4928, 0.0244, 0.8627, 0.2051, 0.0832, 0.7783, 0.8106, 0.171 ,
        0.4244, 0.037 ],
       [0.307 , 0.5896, 0.0428, 0.8242, 0.8109, 0.1616, 0.4597, 0.3571,
        0.8995, 0.6637],
       [0.2387, 0.2928, 0.1601, 0.7158, 0.9184, 0.3832, 0.7816, 0.7846,
        0.9542, 0.9915]])

In [27]:
?np.array

In [29]:
# help(np.array)

In [31]:
np.zeros((3, 2, 7)).shape

(3, 2, 7)

### Operations

* element-wise
* aggregations
* matrix

In [33]:
print(type(data))

<class 'numpy.ndarray'>


In [32]:
print(data)

[[26  1  0]
 [ 5 13  5]
 [18  3  1]
 [13 10  0]]


In [34]:
# element-wise
data + data

array([[52,  2,  0],
       [10, 26, 10],
       [36,  6,  2],
       [26, 20,  0]])

In [35]:
data * data

array([[676,   1,   0],
       [ 25, 169,  25],
       [324,   9,   1],
       [169, 100,   0]])

In [36]:
data ** 2

array([[676,   1,   0],
       [ 25, 169,  25],
       [324,   9,   1],
       [169, 100,   0]])

In [39]:
a = np.arange(12).reshape(3, 4)
b = np.zeros((3, 4))
a * b

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [41]:
np.arange(12).reshape(3, 4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [42]:
a + b

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [43]:
np.exp(data)

array([[1.9573e+11, 2.7183e+00, 1.0000e+00],
       [1.4841e+02, 4.4241e+05, 1.4841e+02],
       [6.5660e+07, 2.0086e+01, 2.7183e+00],
       [4.4241e+05, 2.2026e+04, 1.0000e+00]])

In [44]:
### aggregate
np.sum(data)

95

In [45]:
print(data)

[[26  1  0]
 [ 5 13  5]
 [18  3  1]
 [13 10  0]]


In [47]:
np.sum(data, axis=1) 

array([27, 23, 22, 23])

In [46]:
np.sum(data, axis=0)

array([62, 27,  6])

In [48]:
np.prod(a, axis=1)

array([   0,  840, 7920])

In [None]:
a

In [50]:
np.mean(data, axis=1)

array([9.    , 7.6667, 7.3333, 7.6667])

In [56]:
### matrix operations
x = np.random.rand(3, 4)
y = np.random.rand(4, 10)
print(x)
print(y)

[[0.9249 0.4478 0.93   0.6468]
 [0.3635 0.6538 0.3275 0.7236]
 [0.2777 0.6158 0.8362 0.3738]]
[[0.3708 0.0946 0.1739 0.9376 0.2293 0.5919 0.6185 0.6069 0.5941 0.915 ]
 [0.2757 0.3508 0.8199 0.7455 0.1773 0.0668 0.4512 0.5119 0.6097 0.2617]
 [0.3504 0.7135 0.1968 0.2094 0.8118 0.5924 0.5814 0.6589 0.1485 0.9031]
 [0.7274 0.1179 0.0151 0.7948 0.3671 0.082  0.9541 0.5005 0.9777 0.5264]]


In [57]:
np.dot(x, y).shape

(3, 10)

In [58]:
x @ y

array([[1.2627, 0.9843, 0.7208, 1.9099, 1.2839, 1.1813, 1.9318, 1.727 ,
        1.593 , 2.1437],
       [0.9561, 0.5827, 0.6747, 1.472 , 0.7308, 0.5122, 1.4006, 1.1333,
        1.3707, 1.1803],
       [0.8376, 0.8829, 0.7234, 1.1917, 0.989 , 0.7316, 1.2924, 1.2218,
        1.0301, 1.3672]])

In [59]:
v = np.ones(3)
np.dot(data, v) # (4, 3) x (3,) -> (4,)

array([27., 23., 22., 23.])

In [60]:
v.shape

(3,)

In [None]:
v = np.ones((3, 1))
np.dot(data, v) # (4, 3) x (3, 1) -> (4, 1)

In [None]:
data @ v

In [None]:
data.dot(v)

In [None]:
np.sum(data, axis=1)
data.sum(axis=1)

In [61]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [62]:
np.linalg.inv(np.eye(5)) # inverse matrix

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [None]:
np.linalg.solve()

#### Скалярное произведение

Скалярное произведение в пространстве $\mathbb{R}^{n}$ для двух векторов $x = (x_{1}, \dots, x_{n})$ и $y = (y_{1}, \dots, y_{n})$ определяется как:

$$
\langle x, y \rangle = \sum_{i=1}^n x_{i} y_{i}.
$$

Скалярное произведение двух векторов можно вычислять с помощью функции __`numpy.dot(a, b, ...)`__ или _метода_ __`vec1.dot(vec2)`__, где __`vec1`__ и __`vec2`__ — исходные векторы.

In [63]:
a = np.array([1, 2, 3])
b = np.array([6, 5, 4])
np.dot(a, b)

28

# Header

Пишем **текст**
- Item 1
- item 2

$\sum_i x_i = N$

#### Угол между векторами


$$
\left| x \right| = \sqrt{\langle x, x \rangle} = \sqrt{\sum_{i=1}^n x_{i}^2} 
$$
____
Теперь, когда мы знаем расстояние между двумя ненулевыми векторами и их длины, мы можем вычислить угол между ними через скалярное произведение:

$$
\langle x, y \rangle = \left| x \right| | y | \cos(\alpha)
\implies \cos(\alpha) = \frac{\langle x, y \rangle}{\left| x \right| | y |},
$$

где $\alpha \in [0, \pi]$ — угол между векторами $x$ и $y$.

In [64]:
from numpy.linalg import norm

a = np.array([0, 0, 1])
b = np.array([0, 1, 0])

cos_angle = np.dot(a, b) / (norm(a)  * norm(b))

print('Косинус угла между a и b:', cos_angle)
print('Сам угол:', np.rad2deg(np.arccos(cos_angle)))

Косинус угла между a и b: 0.0
Сам угол: 90.0


### Indexing

In [65]:
data

array([[26,  1,  0],
       [ 5, 13,  5],
       [18,  3,  1],
       [13, 10,  0]])

In [66]:
data[1]

array([ 5, 13,  5])

In [67]:
data[:, 2]

array([0, 5, 1, 0])

In [68]:
data[:2, :2]

array([[26,  1],
       [ 5, 13]])

In [71]:
data[::2]

array([[26,  0],
       [ 5,  5],
       [18,  1],
       [13,  0]])

In [70]:
data[::2, -1]

array([0, 1])

In [None]:
data[-2:, -2:]

In [None]:
data[[1, 2]]

In [73]:
data[:, [False, True, False]] # логическая индексация

array([[ 1],
       [13],
       [ 3],
       [10]])

In [79]:
# перемешать строки в случайном порядке
np.random.permutation(data)

array([[ 5, 13,  5],
       [13, 10,  0],
       [18,  3,  1],
       [26,  1,  0]])

In [80]:
idxs = np.random.permutation(np.arange(data.shape[0]))

idxs

array([3, 2, 0, 1])

In [82]:
data[[0, 1 , 2, 3]]

array([[26,  1,  0],
       [ 5, 13,  5],
       [18,  3,  1],
       [13, 10,  0]])

In [81]:
data[idxs]

array([[13, 10,  0],
       [18,  3,  1],
       [26,  1,  0],
       [ 5, 13,  5]])

In [None]:
data[::-1]

In [None]:
data

### Merging 

In [83]:
new_data = np.array([[14, 4, 1], [1, 11, 9]])

In [84]:
new_data

array([[14,  4,  1],
       [ 1, 11,  9]])

In [85]:
data.shape

(4, 3)

In [86]:
long_data = np.vstack((data, new_data))

In [88]:
long_data.shape

(6, 3)

In [91]:
# np.hstack((data, new_data))

In [93]:
np.concatenate((data, new_data), axis=0)

array([[26,  1,  0],
       [ 5, 13,  5],
       [18,  3,  1],
       [13, 10,  0],
       [14,  4,  1],
       [ 1, 11,  9]])

np.hstack((data, new_data)) # ошибка

In [None]:
np.hstack((long_data, long_data.sum(axis=1).reshape(6, 1)))

In [None]:
long_data.shape

In [None]:
long_data.sum(axis=1).shape

In [None]:
##### newaxis

In [95]:
sums = long_data.sum(axis=1)

In [96]:
sums

array([27, 23, 22, 23, 19, 21])

In [None]:
sums.shape

In [None]:
sums[:, np.newaxis]

In [None]:
sums[:, np.newaxis].shape

In [None]:
sums[np.newaxis, :].shape

##### Broadcasting

In [97]:
sums

array([27, 23, 22, 23, 19, 21])

In [98]:
long_data

array([[26,  1,  0],
       [ 5, 13,  5],
       [18,  3,  1],
       [13, 10,  0],
       [14,  4,  1],
       [ 1, 11,  9]])

In [99]:
long_data / sums

ValueError: operands could not be broadcast together with shapes (6,3) (6,) 

In [101]:
sums.reshape(-1, 1).shape

(6, 1)

In [102]:
long_data / sums.reshape(-1, 1)

array([[0.963 , 0.037 , 0.    ],
       [0.2174, 0.5652, 0.2174],
       [0.8182, 0.1364, 0.0455],
       [0.5652, 0.4348, 0.    ],
       [0.7368, 0.2105, 0.0526],
       [0.0476, 0.5238, 0.4286]])

In [105]:
sums

array([27, 23, 22, 23, 19, 21])

In [104]:
sums[np.newaxis, :]

array([[27, 23, 22, 23, 19, 21]])

In [106]:
long_data / sums[:, np.newaxis]

array([[0.963 , 0.037 , 0.    ],
       [0.2174, 0.5652, 0.2174],
       [0.8182, 0.1364, 0.0455],
       [0.5652, 0.4348, 0.    ],
       [0.7368, 0.2105, 0.0526],
       [0.0476, 0.5238, 0.4286]])

In [None]:
long_data.shape, sums.shape

In [None]:
# Задача: создать таблицу умножения до 9

In [107]:
numbers = np.arange(10)
numbers

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [108]:
numbers[:, np.newaxis].shape

(10, 1)

In [109]:
numbers[np.newaxis, :].shape

(1, 10)

In [110]:
numbers[:, np.newaxis] * numbers[np.newaxis, :]

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
       [ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27],
       [ 0,  4,  8, 12, 16, 20, 24, 28, 32, 36],
       [ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45],
       [ 0,  6, 12, 18, 24, 30, 36, 42, 48, 54],
       [ 0,  7, 14, 21, 28, 35, 42, 49, 56, 63],
       [ 0,  8, 16, 24, 32, 40, 48, 56, 64, 72],
       [ 0,  9, 18, 27, 36, 45, 54, 63, 72, 81]])

### Binary arrays

In [114]:
lists = [[26, 1, 0], [5, 13, 5], [18, 3, 1], [13, 10, 0]]
data = np.array(lists, dtype='float32')
data

array([[26.,  1.,  0.],
       [ 5., 13.,  5.],
       [18.,  3.,  1.],
       [13., 10.,  0.]], dtype=float32)

In [116]:
data

array([[26.,  1.,  0.],
       [ 5., 13.,  5.],
       [18.,  3.,  1.],
       [13., 10.,  0.]], dtype=float32)

In [115]:
data.dtype

dtype('float32')

In [119]:
bin_array = np.array([1, 0, 1, 0])

In [120]:
bin_array.dtype

dtype('int32')

In [121]:
bin_array = bin_array.astype('bool')

In [122]:
bin_array.dtype

dtype('bool')

In [117]:
bin_array = np.array([True, False, True, False])

In [118]:
bin_array.dtype

dtype('bool')

In [123]:
np.all(bin_array)

False

In [124]:
np.any(bin_array)

True

In [125]:
np.sum(bin_array)

2

In [127]:
yes = long_data[:, 0]
no = long_data[:, 1]
# 

In [129]:
yes, no

(array([26,  5, 18, 13, 14,  1]), array([ 1, 13,  3, 10,  4, 11]))

In [130]:
np.sum(yes<no)

2

### Why use Numpy?

In [131]:
n = 300
A = np.random.rand(n, n)
B = np.random.rand(n, n)

In [133]:
A.shape

(300, 300)

In [134]:
B.shape

(300, 300)

In [135]:
%%time
C = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        for k in range(n):
            C[i, j] += A[i, k] * B[k, j]

Wall time: 28.7 s


In [136]:
print(C[0])

[66.3412 68.944  69.905  70.2104 69.2767 68.0386 65.6205 66.8442 70.0366
 64.6212 69.2029 64.9699 68.4894 72.1372 68.8462 69.5497 69.6575 64.9852
 69.9648 67.4364 70.3422 69.8802 62.9162 71.1745 67.401  68.6138 69.2007
 68.4348 76.2174 70.9284 69.4103 70.7958 65.4891 65.8254 69.7553 68.0724
 73.8186 71.2248 70.7508 70.2815 68.2298 64.3224 70.1984 69.4778 66.6314
 68.2304 69.6882 65.6864 65.4612 66.7263 65.6608 65.7371 70.9203 69.7585
 62.677  68.977  61.6707 68.8208 72.6272 67.4805 67.8539 70.5492 62.7796
 63.9263 73.1553 68.8957 71.6304 69.9627 68.0771 67.2481 69.2543 74.6355
 69.1959 72.5037 68.8896 69.0788 68.066  70.278  73.7043 69.4842 73.1834
 67.7113 70.9166 67.2092 70.49   70.727  67.1372 71.1903 73.5414 72.366
 70.8973 65.4819 71.8157 68.373  75.175  67.3983 68.4912 65.2212 70.485
 67.8779 69.1476 69.0954 68.5494 69.6573 67.2125 65.37   71.1254 68.5577
 69.3282 69.5666 66.4737 67.9228 68.686  70.7336 68.4017 68.54   65.3241
 67.6738 66.7545 73.298  70.2129 65.9816 69.8152 72.5

In [137]:
%%time
C = A @ B

Wall time: 2 ms


In [138]:
print(C[0])

[66.3412 68.944  69.905  70.2104 69.2767 68.0386 65.6205 66.8442 70.0366
 64.6212 69.2029 64.9699 68.4894 72.1372 68.8462 69.5497 69.6575 64.9852
 69.9648 67.4364 70.3422 69.8802 62.9162 71.1745 67.401  68.6138 69.2007
 68.4348 76.2174 70.9284 69.4103 70.7958 65.4891 65.8254 69.7553 68.0724
 73.8186 71.2248 70.7508 70.2815 68.2298 64.3224 70.1984 69.4778 66.6314
 68.2304 69.6882 65.6864 65.4612 66.7263 65.6608 65.7371 70.9203 69.7585
 62.677  68.977  61.6707 68.8208 72.6272 67.4805 67.8539 70.5492 62.7796
 63.9263 73.1553 68.8957 71.6304 69.9627 68.0771 67.2481 69.2543 74.6355
 69.1959 72.5037 68.8896 69.0788 68.066  70.278  73.7043 69.4842 73.1834
 67.7113 70.9166 67.2092 70.49   70.727  67.1372 71.1903 73.5414 72.366
 70.8973 65.4819 71.8157 68.373  75.175  67.3983 68.4912 65.2212 70.485
 67.8779 69.1476 69.0954 68.5494 69.6573 67.2125 65.37   71.1254 68.5577
 69.3282 69.5666 66.4737 67.9228 68.686  70.7336 68.4017 68.54   65.3241
 67.6738 66.7545 73.298  70.2129 65.9816 69.8152 72.5

**Do use NumPy, not loops**