# Markdown
## Краткое руководство

Абзацы создаются при помощи пустой строки. Если вокруг текста сверху и снизу
есть пустые строки, то текст превращается в абзац.

Чтобы сделать перенос строки вместо абзаца,
нужно поставить два пробела в конце предыдущей строки.

Заголовки отмечаются диезом "#" в начале строки, от одного до шести. Например:
    
# Заголовок первого уровня #
## Заголовок h2
### Заголовок h3
#### Заголовок h4
##### Заголовок h5
###### Заголовок h6

В декоративных целях заголовки можно "закрывать" с обратной стороны.

### Списки

Для разметки неупорядоченных списков можно использовать или "*", или "-", или
"+":
    
- элемент 1
- элемент 2
- элемент ...

Вложенные пункты создаются четырьмя пробелами перед маркером пункта:

* элемент 1
* элемент 2
    * вложенный элемент 2.1
    * вложенный элемент 2.2
* элемент ...

Упорядоченный список:
    
1. элемент 1
2. элемент 2
    1. вложенный
    2. вложенный
3. элемент 3
4. Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem
consectetuer libero luctus adipiscing/

# NumPy

In [7]:
import numpy as np
vec = np.array([1,2,3])
vec.ndim # количество осей

1

In [8]:
mat = np.array([[1,2,3],[4,5,6]])
mat.ndim

2

In [9]:
vec.shape

(3,)

In [10]:
mat.dtype.name

'int32'

In [11]:
mat.itemsize

4

# Создание массивов

In [12]:
A = np.array([1,2,3])
A

array([1, 2, 3])

In [13]:
A = np.array([1,2,3], dtype = float)
A

array([1., 2., 3.])

In [14]:
B = np.array([(1,2,3), (4,5,6)])
B

array([[1, 2, 3],
       [4, 5, 6]])

In [15]:
np.zeros((3,))

array([0., 0., 0.])

In [16]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [17]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [18]:
np.empty((2,5))

array([[8.90070286e-308, 2.33644639e-307, 1.27947349e-307,
        1.11259465e-307, 9.79098366e-307],
       [1.27945991e-307, 1.11261027e-306, 1.29061821e-306,
        8.90103559e-307, 1.24611470e-306]])

In [19]:
np.arange(2,20,3)

array([ 2,  5,  8, 11, 14, 17])

In [20]:
np.arange(2.5,8.7,0.9)

array([2.5, 3.4, 4.3, 5.2, 6.1, 7. , 7.9])

In [21]:
np.linspace(2,18,14)

array([ 2.        ,  3.23076923,  4.46153846,  5.69230769,  6.92307692,
        8.15384615,  9.38461538, 10.61538462, 11.84615385, 13.07692308,
       14.30769231, 15.53846154, 16.76923077, 18.        ])

In [22]:
np.arange(9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [23]:
np.arange(8).reshape(2,-1)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [24]:
C = np.arange(6).reshape(2,-1)
C

array([[0, 1, 2],
       [3, 4, 5]])

In [25]:
C.T

array([[0, 3],
       [1, 4],
       [2, 5]])

In [26]:
A = np.arange(6).reshape(2,-1)
np.hstack((A,A**2))

array([[ 0,  1,  2,  0,  1,  4],
       [ 3,  4,  5,  9, 16, 25]])

In [27]:
np.vstack((A,A**2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 0,  1,  4],
       [ 9, 16, 25]])

In [28]:
np.concatenate((A,A**2), axis = 1)

array([[ 0,  1,  2,  0,  1,  4],
       [ 3,  4,  5,  9, 16, 25]])

In [29]:
a = np.arange(3)
np.tile(a, (2,2))

array([[0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2]])

In [30]:
np.tile(a, (4,1))

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

# Базовые операции

In [31]:
A = np.arange(9).reshape(3,3)
B = np.arange(1,10).reshape(3,3)

In [32]:
print (A)
print (B)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [33]:
A+B

array([[ 1,  3,  5],
       [ 7,  9, 11],
       [13, 15, 17]])

In [34]:
A*1.0/B

array([[0.        , 0.5       , 0.66666667],
       [0.75      , 0.8       , 0.83333333],
       [0.85714286, 0.875     , 0.88888889]])

In [35]:
A+1

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [36]:
3*A

array([[ 0,  3,  6],
       [ 9, 12, 15],
       [18, 21, 24]])

In [37]:
A**2

array([[ 0,  1,  4],
       [ 9, 16, 25],
       [36, 49, 64]], dtype=int32)

In [38]:
A*B

array([[ 0,  2,  6],
       [12, 20, 30],
       [42, 56, 72]])

In [39]:
A.dot(B)

array([[ 18,  21,  24],
       [ 54,  66,  78],
       [ 90, 111, 132]])

In [40]:
np.tile(np.arange(0,40,10),(3,1)).T + np.array([0,1,2])

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [41]:
np.exp(A)

array([[1.00000000e+00, 2.71828183e+00, 7.38905610e+00],
       [2.00855369e+01, 5.45981500e+01, 1.48413159e+02],
       [4.03428793e+02, 1.09663316e+03, 2.98095799e+03]])

In [42]:
A

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [43]:
A.min()

0

In [44]:
A.max(axis=0)

array([6, 7, 8])

In [45]:
A.sum(axis=1)

array([ 3, 12, 21])

# Индесация

In [46]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [47]:
a[2:5]

array([2, 3, 4])

In [48]:
a[3:8:2]

array([3, 5, 7])

In [49]:
A = np.arange(81).reshape(9,-1)
A

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26],
       [27, 28, 29, 30, 31, 32, 33, 34, 35],
       [36, 37, 38, 39, 40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49, 50, 51, 52, 53],
       [54, 55, 56, 57, 58, 59, 60, 61, 62],
       [63, 64, 65, 66, 67, 68, 69, 70, 71],
       [72, 73, 74, 75, 76, 77, 78, 79, 80]])

In [50]:
A[2:4]

array([[18, 19, 20, 21, 22, 23, 24, 25, 26],
       [27, 28, 29, 30, 31, 32, 33, 34, 35]])

In [51]:
A[:,2:4]

array([[ 2,  3],
       [11, 12],
       [20, 21],
       [29, 30],
       [38, 39],
       [47, 48],
       [56, 57],
       [65, 66],
       [74, 75]])

In [52]:
A[2:4,2:4]

array([[20, 21],
       [29, 30]])

In [53]:
A[-1]

array([72, 73, 74, 75, 76, 77, 78, 79, 80])

In [54]:
A = np.arange(81).reshape(9,-1)
A

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26],
       [27, 28, 29, 30, 31, 32, 33, 34, 35],
       [36, 37, 38, 39, 40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49, 50, 51, 52, 53],
       [54, 55, 56, 57, 58, 59, 60, 61, 62],
       [63, 64, 65, 66, 67, 68, 69, 70, 71],
       [72, 73, 74, 75, 76, 77, 78, 79, 80]])

In [55]:
A[[2,4,5],[0,1,3]]

array([18, 37, 48])

In [56]:
A = np.arange(11)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [57]:
A[A%5!=3]

array([ 0,  1,  2,  4,  5,  6,  7,  9, 10])

In [58]:
A[np.logical_and(A!=7,A%5!=3)]

array([ 0,  1,  2,  4,  5,  6,  9, 10])

# Зачем?

In [59]:
import time

A_quick_arr = np.random.normal(size = (1000000,))
B_quick_arr = np.random.normal(size = (1000000,))

A_slow_list, B_slow_list = list(A_quick_arr), list(B_quick_arr)

In [60]:
start = time.time()
ans = 0
for i in range(len(A_slow_list)):
    ans += A_slow_list[i] * B_slow_list[i]
print(time.time() - start)

0.31082606315612793


In [61]:
start = time.time()
ans = sum([A_slow_list[i] * B_slow_list[i] for i in range(1000000)])
print(time.time() - start)

0.27776432037353516


In [62]:
start = time.time()
ans = np.sum(A_quick_arr * B_quick_arr)
print(time.time() - start)

0.005983829498291016


In [63]:
start = time.time()
ans = A_quick_arr.dot(B_quick_arr)
print(time.time() - start)

0.003956317901611328


# Pandas

## Series

In [64]:
import pandas as pd
my_series = pd.Series([5,6,7,8,9,10])
my_series

0     5
1     6
2     7
3     8
4     9
5    10
dtype: int64

In [65]:
my_series.index

RangeIndex(start=0, stop=6, step=1)

In [66]:
my_series.values

array([ 5,  6,  7,  8,  9, 10], dtype=int64)

In [67]:
my_series[4]

9

In [68]:
my_series2 = pd.Series([5,6,7,8,9,10], index=['a','b','c','d','e','f'])
my_series2['f']

10

In [69]:
my_series2[['a','b','f']]

a     5
b     6
f    10
dtype: int64

In [70]:
my_series2[['a','b','f']] = 0
my_series2

a    0
b    0
c    7
d    8
e    9
f    0
dtype: int64

In [71]:
my_series2[my_series2 > 0]

c    7
d    8
e    9
dtype: int64

In [72]:
my_series2[my_series2 > 0] * 2

c    14
d    16
e    18
dtype: int64

In [73]:
my_series3 = pd.Series({'a':5,'b':6,'c':7,'d':8})
my_series3

a    5
b    6
c    7
d    8
dtype: int64

In [74]:
my_series3.name = 'numbers'
my_series3.index.name = 'letters'
my_series3

letters
a    5
b    6
c    7
d    8
Name: numbers, dtype: int64

In [75]:
my_series3.index = ['A','B','C','D']
my_series3

A    5
B    6
C    7
D    8
Name: numbers, dtype: int64

## DataFrame

In [76]:
df = pd.DataFrame({
    'country': ['Kazakhstan','Russia','Belarus','Ukraine'],
    'population': [17.04,143.5,9.5,45.5],
    'square': [2724902,17125191,207600,603628]
})
df

Unnamed: 0,country,population,square
0,Kazakhstan,17.04,2724902
1,Russia,143.5,17125191
2,Belarus,9.5,207600
3,Ukraine,45.5,603628


In [77]:
df['country']

0    Kazakhstan
1        Russia
2       Belarus
3       Ukraine
Name: country, dtype: object

In [78]:
df.columns

Index(['country', 'population', 'square'], dtype='object')

In [79]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [80]:
df = pd.DataFrame({
    'country': ['Kazakhstan','Russia','Belarus','Ukraine'],
    'population': [17.04,143.5,9.5,45.5],
    'square': [2724902,17125191,207600,603628]
}, index=['KZ','RU','BY','UA'])
df

Unnamed: 0,country,population,square
KZ,Kazakhstan,17.04,2724902
RU,Russia,143.5,17125191
BY,Belarus,9.5,207600
UA,Ukraine,45.5,603628


In [81]:
df.index=['KZ','RU','BY','UA']
df.index.name='Country Code'
df

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KZ,Kazakhstan,17.04,2724902
RU,Russia,143.5,17125191
BY,Belarus,9.5,207600
UA,Ukraine,45.5,603628


In [82]:
df['country']

Country Code
KZ    Kazakhstan
RU        Russia
BY       Belarus
UA       Ukraine
Name: country, dtype: object

In [83]:
df.loc['KZ']

country       Kazakhstan
population         17.04
square           2724902
Name: KZ, dtype: object

In [84]:
df.iloc[0]

country       Kazakhstan
population         17.04
square           2724902
Name: KZ, dtype: object

In [85]:
df.loc[['KZ','RU'], 'population']

Country Code
KZ     17.04
RU    143.50
Name: population, dtype: float64

In [86]:
df.loc['KZ':'BY', :]

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KZ,Kazakhstan,17.04,2724902
RU,Russia,143.5,17125191
BY,Belarus,9.5,207600


In [87]:
df[df.population > 10] [['country','square']]

Unnamed: 0_level_0,country,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1
KZ,Kazakhstan,2724902
RU,Russia,17125191
UA,Ukraine,603628


In [88]:
df.reset_index()

Unnamed: 0,Country Code,country,population,square
0,KZ,Kazakhstan,17.04,2724902
1,RU,Russia,143.5,17125191
2,BY,Belarus,9.5,207600
3,UA,Ukraine,45.5,603628


In [89]:
df['density'] = df['population'] / df['square'] * 1000000
df

Unnamed: 0_level_0,country,population,square,density
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KZ,Kazakhstan,17.04,2724902,6.253436
RU,Russia,143.5,17125191,8.379469
BY,Belarus,9.5,207600,45.761079
UA,Ukraine,45.5,603628,75.37755


In [90]:
df.drop(['density'], axis='columns')

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KZ,Kazakhstan,17.04,2724902
RU,Russia,143.5,17125191
BY,Belarus,9.5,207600
UA,Ukraine,45.5,603628


In [91]:
df = df.rename(columns={'Coutry Code': 'country_code'})
df

Unnamed: 0_level_0,country,population,square,density
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KZ,Kazakhstan,17.04,2724902,6.253436
RU,Russia,143.5,17125191,8.379469
BY,Belarus,9.5,207600,45.761079
UA,Ukraine,45.5,603628,75.37755


## Группировка и агрегирование в pandas