### Библиотеки

In [None]:
import pandas as pd
import numpy as np

### Создание

#### из списка

создаем Series значений из списка целых чисел

In [None]:
s = pd.Series(data=[10, 11, 12, 13, 14],
              index=[1, 2, 3, 5, 7])
s

1    10
2    11
3    12
5    13
7    14
dtype: int64

создаем Series из строковых значений

In [None]:
s = pd.Series(['Blue', 'Yellow', 'Green'])
s

0      Blue
1    Yellow
2     Green
dtype: object

создаем Series из 5 элементов, каждый элемент - list python

In [None]:
l = [[1, 2]]
s = pd.Series(l*5)
s

0    [1, 2]
1    [1, 2]
2    [1, 2]
3    [1, 2]
4    [1, 2]
dtype: object

создаем DataFrame из двумерного списка

In [None]:
df = pd.DataFrame([[10, 11], [20, 21], [30, 31]])
df

Unnamed: 0,0,1
0,10,11
1,20,21
2,30,31


задаем имена столбцов

In [None]:
df = pd.DataFrame([[10, 11], [20, 21], [30, 31]],
                  columns=['A', 'B'])
df

Unnamed: 0,A,B
0,10,11
1,20,21
2,30,31


создаем DataFrame для списка объектов Series

In [None]:
series_1 = pd.Series([70, 90])
series_2 = pd.Series([71, 91])
df = pd.DataFrame([series_1, series_2])
df

Unnamed: 0,0,1
0,70,90
1,71,91


задаем имена столбцов после создания датафрейма

In [None]:
df.columns = ['col_1', 'col_2']
df

Unnamed: 0,col_1,col_2
0,70,90
1,71,91


#### из словаря

создаем объект Series из словаря, при этом посмотрим, как изменились индексы

In [None]:
s = pd.Series({'Homer': 'Dad',
               'Marge': 'Mom',
               'Bart': 'Son',
               'Lisa': 'Daughter',
               'Maggie': 'Daughter'})
s

Homer          Dad
Marge          Mom
Bart           Son
Lisa      Daughter
Maggie    Daughter
dtype: object

создание DataFrame с помощью питоновского словаря

In [None]:
list_1 = [70, 71]
list_2 = [90, 91]
temperatures = {'col_1': list_1,
                'col_2': list_2}
pd.DataFrame(temperatures)

Unnamed: 0,col_1,col_2
0,70,90
1,71,91


создание DataFrame с помощью словаря, состоящего из объектов Series

In [None]:
series_1 = pd.Series([70, 71])
series_2 = pd.Series([90, 91])

df = pd.DataFrame({'col_1': series_1,
                   'col_2': series_2})
df

Unnamed: 0,col_1,col_2
0,70,90
1,71,91


#### при помощи функций

создание Series, используя np.arange - последовательность чисел от **start** до **stop-1** с шагом **step**:
```python
np.arange(start, stop, step)
```

In [None]:
s = pd.Series(np.arange(15,25,2))
s

0    15
1    17
2    19
3    21
4    23
dtype: int64

создаем Series из 5 значений, равномерно разбивающих отрезок 0 до 9

In [None]:
s = pd.Series(np.linspace(0, 9, 5))
s

0    0.00
1    2.25
2    4.50
3    6.75
4    9.00
dtype: float64

Генерация случайных чисел.

Зафикисруем значение seed, что позволит нам в будущем воcпроизводить свои результаты

Создадим объект Series из 5 нормально распределенных случайных чисел

In [None]:
np.random.seed(123)
s = pd.Series(np.random.normal(size=5))
s

0   -1.085631
1    0.997345
2    0.282978
3   -1.506295
4   -0.578600
dtype: float64

Создадим объект DataFrame размерности 4х3 из случайных чисел

In [None]:
np.random.seed(123)
df = pd.DataFrame(np.random.normal(size=12).reshape(4, 3),
                  index=['ind_1', 'ind_2', 'ind_3', 'ind_4'],
                  columns=['col_1', 'col_2', 'col_3'])
df

Unnamed: 0,col_1,col_2,col_3
ind_1,-1.085631,0.997345,0.282978
ind_2,-1.506295,-0.5786,1.651437
ind_3,-2.426679,-0.428913,1.265936
ind_4,-0.86674,-0.678886,-0.094709


#### [из файла](http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-tools-text-csv-hdf5)

| Column Name        | Description
| ------------- |:-------------:|
|Symbol|Сокращенное название организации|
|Name|Полное название организации|
|Sector|Сектор экономики|
|Price|Стоимость акции|
|Dividend Yield|Дивидендная доходность|
|Price/Earnings|Цена / прибыль|
|Earnings/Share|Прибыль на акцию|
|Book Value|Балансовая стоимость компании|
|52 week low|52-недельный минимум|
|52 week high|52-недельный максимум|
|Market Cap|Рыночная капитализация|
|EBITDA|**E**arnings **b**efore **i**nterest, **t**axes, **d**epreciation and **a**mortization|
|Price/Sales|Цена / объём продаж|
|Price/Book|Цена / балансовая стоимость|
|SEC Filings|Ссылка *sec.gov*|

In [None]:
pd.read_csv(filepath_or_buffer = "sp500.csv",
            sep = ';')

Unnamed: 0,"Symbol,Name,Sector,Price,Dividend Yield,Price/Earnings,Earnings/Share,Book Value,52 week low,52 week high,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings"
0,"MMM,3M Co.,Industrials,141.14,2.12,20.33,6.90,..."
1,"ABT,Abbott Laboratories,Health Care,39.60,1.82..."
2,"ABBV,AbbVie Inc.,Health Care,53.95,3.02,20.87,..."
3,"ACN,Accenture,Information Technology,79.79,2.3..."
4,"ACE,ACE Limited,Financials,102.91,2.21,10.00,1..."
...,...
495,"YHOO,Yahoo Inc.,Information Technology,35.02,,..."
496,"YUM,Yum! Brands Inc,Consumer Discretionary,74...."
497,"ZMH,Zimmer Holdings,Health Care,101.84,0.81,22..."
498,"ZION,Zions Bancorp,Financials,28.43,0.56,18.82..."


In [None]:
pd.read_csv(filepath_or_buffer = "sp500.csv",
            sep = ',')

Unnamed: 0,Symbol,Name,Sector,Price,Dividend Yield,Price/Earnings,Earnings/Share,Book Value,52 week low,52 week high,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings
0,MMM,3M Co.,Industrials,141.14,2.12,20.33,6.900,26.668,107.15,143.37,92.345,8.1210,2.95,5.26,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,ABT,Abbott Laboratories,Health Care,39.60,1.82,25.93,1.529,15.573,32.70,40.49,59.477,4.3590,2.74,2.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABBV,AbbVie Inc.,Health Care,53.95,3.02,20.87,2.570,2.954,40.10,54.78,85.784,7.1900,4.48,18.16,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ACN,Accenture,Information Technology,79.79,2.34,19.53,4.068,8.326,69.00,85.88,50.513,4.4230,1.75,9.54,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACE,ACE Limited,Financials,102.91,2.21,10.00,10.293,86.897,84.73,104.07,34.753,4.2750,1.79,1.18,http://www.sec.gov/cgi-bin/browse-edgar?action...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,YHOO,Yahoo Inc.,Information Technology,35.02,,28.94,1.199,12.768,23.82,41.72,35.258,0.8873,7.48,2.72,http://www.sec.gov/cgi-bin/browse-edgar?action...
496,YUM,Yum! Brands Inc,Consumer Discretionary,74.77,1.93,29.86,2.507,5.147,64.08,79.70,33.002,2.8640,2.49,14.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
497,ZMH,Zimmer Holdings,Health Care,101.84,0.81,22.92,4.441,37.181,74.55,108.33,17.091,1.6890,3.68,2.74,http://www.sec.gov/cgi-bin/browse-edgar?action...
498,ZION,Zions Bancorp,Financials,28.43,0.56,18.82,1.511,30.191,26.39,33.33,5.257,0.0000,2.49,0.94,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [None]:
pd.read_csv(filepath_or_buffer = "https://raw.githubusercontent.com/trisha00001/file/main/sp500.csv",
            sep = ',')

Unnamed: 0,"Symbol,Name,Sector,Price,Dividend Yield,Price/Earnings,Earnings/Share,Book Value,52 week low,52 week high,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings"
0,"MMM,3M Co.,Industrials,141.14,2.12,20.33,6.90,..."
1,"ABT,Abbott Laboratories,Health Care,39.60,1.82..."
2,"ABBV,AbbVie Inc.,Health Care,53.95,3.02,20.87,..."
3,"ACN,Accenture,Information Technology,79.79,2.3..."
4,"ACE,ACE Limited,Financials,102.91,2.21,10.00,1..."
...,...
495,"YHOO,Yahoo Inc.,Information Technology,35.02,,..."
496,"YUM,Yum! Brands Inc,Consumer Discretionary,74...."
497,"ZMH,Zimmer Holdings,Health Care,101.84,0.81,22..."
498,"ZION,Zions Bancorp,Financials,28.43,0.56,18.82..."


разделитель

In [None]:
pd.read_csv(filepath_or_buffer = "sp500.csv",
           sep = ',')

Unnamed: 0,Symbol,Name,Sector,Price,Dividend Yield,Price/Earnings,Earnings/Share,Book Value,52 week low,52 week high,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings
0,MMM,3M Co.,Industrials,141.14,2.12,20.33,6.900,26.668,107.15,143.37,92.345,8.1210,2.95,5.26,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,ABT,Abbott Laboratories,Health Care,39.60,1.82,25.93,1.529,15.573,32.70,40.49,59.477,4.3590,2.74,2.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABBV,AbbVie Inc.,Health Care,53.95,3.02,20.87,2.570,2.954,40.10,54.78,85.784,7.1900,4.48,18.16,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ACN,Accenture,Information Technology,79.79,2.34,19.53,4.068,8.326,69.00,85.88,50.513,4.4230,1.75,9.54,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACE,ACE Limited,Financials,102.91,2.21,10.00,10.293,86.897,84.73,104.07,34.753,4.2750,1.79,1.18,http://www.sec.gov/cgi-bin/browse-edgar?action...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,YHOO,Yahoo Inc.,Information Technology,35.02,,28.94,1.199,12.768,23.82,41.72,35.258,0.8873,7.48,2.72,http://www.sec.gov/cgi-bin/browse-edgar?action...
496,YUM,Yum! Brands Inc,Consumer Discretionary,74.77,1.93,29.86,2.507,5.147,64.08,79.70,33.002,2.8640,2.49,14.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
497,ZMH,Zimmer Holdings,Health Care,101.84,0.81,22.92,4.441,37.181,74.55,108.33,17.091,1.6890,3.68,2.74,http://www.sec.gov/cgi-bin/browse-edgar?action...
498,ZION,Zions Bancorp,Financials,28.43,0.56,18.82,1.511,30.191,26.39,33.33,5.257,0.0000,2.49,0.94,http://www.sec.gov/cgi-bin/browse-edgar?action...


количество строк

In [None]:
pd.read_csv(filepath_or_buffer = "sp500.csv",
           sep = ',',
           nrows = 3)

Unnamed: 0,Symbol,Name,Sector,Price,Dividend Yield,Price/Earnings,Earnings/Share,Book Value,52 week low,52 week high,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings
0,MMM,3M Co.,Industrials,141.14,2.12,20.33,6.9,26.668,107.15,143.37,92.345,8.121,2.95,5.26,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,ABT,Abbott Laboratories,Health Care,39.6,1.82,25.93,1.529,15.573,32.7,40.49,59.477,4.359,2.74,2.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABBV,AbbVie Inc.,Health Care,53.95,3.02,20.87,2.57,2.954,40.1,54.78,85.784,7.19,4.48,18.16,http://www.sec.gov/cgi-bin/browse-edgar?action...


столбцы

In [None]:
pd.read_csv(filepath_or_buffer = "https://raw.githubusercontent.com/trisha00001/file/main/sp500.csv",
           sep = ',',
           nrows = 3,
           usecols=['Symbol', 'Sector', 'Price', 'Book Value'])

Unnamed: 0,Symbol,Sector,Price,Book Value
0,MMM,Industrials,141.14,26.668
1,ABT,Health Care,39.6,15.573
2,ABBV,Health Care,53.95,2.954


индекс

In [None]:
pd.read_csv(filepath_or_buffer = "sp500.csv",
           sep = ',',
           nrows = 3,
           usecols=['Symbol', 'Sector', 'Price', 'Book Value'],
           index_col='Symbol')

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954


итератор

In [None]:
df_chunk = pd.read_csv(filepath_or_buffer = "sp500.csv",
                       sep = ',',
                       chunksize=50,
                       usecols=['Symbol', 'Sector', 'Price', 'Book Value'],
                       index_col='Symbol')

In [None]:
for df_tmp in df_chunk:
    print('DataFrame part:', df_tmp.shape)

DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)
DataFrame part: (50, 3)


In [None]:
sp500 = pd.read_csv(filepath_or_buffer = "https://raw.githubusercontent.com/trisha00001/file/main/sp500.csv",
                    sep = ',',
                    usecols=['Symbol', 'Sector', 'Price', 'Book Value'],
                    index_col='Symbol')
sp500

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.60,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897
...,...,...,...
YHOO,Information Technology,35.02,12.768
YUM,Consumer Discretionary,74.77,5.147
ZMH,Health Care,101.84,37.181
ZION,Financials,28.43,30.191


### Свойства

#### создаем Series для примеров

In [None]:
Simpsons = pd.Series({'Homer': 120,
                      'Marge': 60,
                      'Bart': 35,
                      'Lisa': 30,
                      'Maggie': 7})

Simpsons

Unnamed: 0,0
Homer,120
Marge,60
Bart,35
Lisa,30
Maggie,7


In [None]:
np.random.seed(123)
numbers = pd.Series(data = np.random.normal(size=10),
                    index = np.arange(25,35))
numbers

Unnamed: 0,0
25,-1.085631
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786
30,1.651437
31,-2.426679
32,-0.428913
33,1.265936
34,-0.86674


#### тип данных

In [None]:
Simpsons.dtype

dtype('int64')

In [None]:
sp500.dtypes

Sector         object
Price         float64
Book Value    float64
dtype: object

#### количество элементов

Series:

In [None]:
print('Первый способ:', len(Simpsons))
print('Второй способ:', Simpsons.size)
print('Третий способ:', Simpsons.shape)

Первый способ: 5
Второй способ: 5
Третий способ: (5,)


DataFrame:

In [None]:
print('Первый способ:', len(sp500))
print('Второй способ:', sp500.size)
print('Третий способ:', sp500.shape)

Первый способ: 500
Второй способ: 1500
Третий способ: (500, 3)


#### количество уникальных элементов

In [None]:
Simpsons.nunique()

5

In [None]:
sp500.nunique()

Sector         13
Price         495
Book Value    495
dtype: int64

#### индекс и значения

Series:

In [None]:
Simpsons.index

Index(['Homer', 'Marge', 'Bart', 'Lisa', 'Maggie'], dtype='object')

In [None]:
Simpsons.values

array([120,  60,  35,  30,   7])

DataFrame:

In [None]:
sp500.index

Index(['MMM', 'ABT', 'ABBV', 'ACN', 'ACE', 'ACT', 'ADBE', 'AES', 'AET', 'AFL',
       ...
       'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YHOO', 'YUM', 'ZMH', 'ZION', 'ZTS'],
      dtype='object', name='Symbol', length=500)

In [None]:
sp500.values

array([['Industrials', 141.14, 26.668],
       ['Health Care', 39.6, 15.573],
       ['Health Care', 53.95, 2.954],
       ...,
       ['Health Care', 101.84, 37.181],
       ['Financials', 28.43, 30.191],
       ['Health Care', 30.53, 2.15]], dtype=object)

In [None]:
sp500.columns

Index(['Sector', 'Price', 'Book Value'], dtype='object')

#### присвоение / изменение имени

#####  объекта Series

In [None]:
Simpsons.name = 'Simpsons weight'
Simpsons

Homer     120
Marge      60
Bart       35
Lisa       30
Maggie      7
Name: Simpsons weight, dtype: int64

##### индекса

In [None]:
Simpsons.index.name = 'First name'
Simpsons

First name
Homer     120
Marge      60
Bart       35
Lisa       30
Maggie      7
Name: Simpsons weight, dtype: int64

##### столбца

In [None]:
sp500_copy = sp500.rename(columns = {'Book Value': 'BookValue'})

проверяем, не изменились ли имена столбцов в исходном датафрейме

In [None]:
sp500.columns

Index(['Sector', 'Price', 'Book Value'], dtype='object')

In [None]:
sp500_copy.columns

Index(['Sector', 'Price', 'BookValue'], dtype='object')

этот программный код переименовывает столбец на месте

In [None]:
sp500_copy.rename(columns = {'BookValue': 'Book_Value'},
            inplace=True)

смотрим изменилось ли имя столбца

In [None]:
sp500_copy.columns

Index(['Sector', 'Price', 'Book_Value'], dtype='object')

### Вывод значений

#### первые / последние строки

In [None]:
sp500.head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


In [None]:
Simpsons.tail(3)

First name
Bart      35
Lisa      30
Maggie     7
Name: Simpsons weight, dtype: int64

#### столбцы

извлекаем столбец Sector

In [None]:
sp500['Sector'].head()

Symbol
MMM                Industrials
ABT                Health Care
ABBV               Health Care
ACN     Information Technology
ACE                 Financials
Name: Sector, dtype: object

тип столбца датафрейма:

In [None]:
type(sp500['Sector'])

извлекаем столбцы Price и Book Value

In [None]:
sp500[['Price', 'Book Value']].head()

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ABT,39.6,15.573
ABBV,53.95,2.954
ACN,79.79,8.326
ACE,102.91,86.897


покажем, что результат является объектом DataFrame

In [None]:
type(sp500[['Price', 'Book Value']])

атрибутивный доступ к столбцу по имени

In [None]:
sp500.Price.head()

Symbol
MMM     141.14
ABT      39.60
ABBV     53.95
ACN      79.79
ACE     102.91
Name: Price, dtype: float64

пример с названием "Book Value"

In [None]:
sp500.Book Value

SyntaxError: invalid syntax (<ipython-input-46-f23c90049dce>, line 1)

#### строки

##### по метке

**Series**

In [None]:
numbers.loc[[25,33]]

25   -1.085631
33    1.265936
dtype: float64

In [None]:
numbers

25   -1.085631
26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

ошибка - нет метки

In [None]:
numbers.loc[0]

KeyError: 0

In [None]:
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
                  index=['cobra', 'viper', 'sidewinder'],
                  columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [None]:
df.loc['viper']

max_speed    4
shield       5
Name: viper, dtype: int64

In [None]:
df[df['shield'] > 6]

Unnamed: 0,max_speed,shield
sidewinder,7,8


**DataFrame**

получаем строку с меткой индекса MMM,которая возвращается в виде объекта Series

In [None]:
sp500.loc['MMM']

Sector        Industrials
Price              141.14
Book Value         26.668
Name: MMM, dtype: object

In [None]:
type(sp500.loc['MMM'])

получаем строки MMM и MSFT результатом будет объект DataFrame

In [None]:
sp500.loc[['MMM', 'MSFT']]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
MSFT,Information Technology,40.12,10.584


In [None]:
type(sp500.loc[['MMM', 'MSFT']])

##### по позиции

**Series**

In [None]:
numbers

25   -1.085631
26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

по позиции

In [None]:
numbers.iloc[[5,-5]]

30    1.651437
30    1.651437
dtype: float64

ошибка:

In [None]:
numbers.iloc[10]

IndexError: single positional indexer is out-of-bounds

**DataFrame**

получаем строки, имеющие позиции 0 и 2

In [None]:
sp500.iloc[[0, 2]]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABBV,Health Care,53.95,2.954


получаем позиции меток MMM и A в индексе

In [None]:
i1 = sp500.index.get_loc('MMM')
i2 = sp500.index.get_loc('A')
(i1, i2)

(0, 10)

и извлекаем строки

In [None]:
sp500.iloc[[i1, i2]]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
A,Health Care,56.18,16.928


#### поиск скалярного значения

ищем скалярное значение по метке строки и метке (имени) столбца

In [None]:
sp500.at['MMM', 'Price']

141.14

ищем скалярное значение по позиции строки и позиции столбца; извлекаем значение в строке 0, столбце 1

In [None]:
sp500.iat[0, 1]

141.14

#### одновременный отбор строк и столбцов

отбираем строки с метками индекса ABT и ZTS для столбцов Sector и Price

In [None]:
sp500.loc[['ABT', 'ZTS']][['Sector', 'Price']]

Unnamed: 0_level_0,Sector,Price
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ABT,Health Care,39.6
ZTS,Health Care,30.53


In [None]:
sp500.loc[['ABT', 'ZTS'],['Sector', 'Price']]

Unnamed: 0_level_0,Sector,Price
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ABT,Health Care,39.6
ZTS,Health Care,30.53


определение номера позиций заданных меток

In [None]:
print(sp500.index.get_loc('ABT'),sp500.index.get_loc('ZTS'))

1 499


отбор строк и столбцов по номеру позиций

In [None]:
sp500.iloc[[1,499],[0,1]]

Unnamed: 0_level_0,Sector,Price
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ABT,Health Care,39.6
ZTS,Health Care,30.53


#### транспонирование

In [None]:
sp500.head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


In [None]:
sp500.T.head()

Symbol,MMM,ABT,ABBV,ACN,ACE,ACT,ADBE,AES,AET,AFL,...,XEL,XRX,XLNX,XL,XYL,YHOO,YUM,ZMH,ZION,ZTS
Sector,Industrials,Health Care,Health Care,Information Technology,Financials,Health Care,Information Technology,Utilities,Health Care,Financials,...,Utilities,Information Technology,Information Technology,Financials,Industrials,Information Technology,Consumer Discretionary,Health Care,Financials,Health Care
Price,141.14,39.6,53.95,79.79,102.91,213.77,64.3,13.61,76.39,61.31,...,30.24,12.06,46.03,32.47,38.42,35.02,74.77,101.84,28.43,30.53
Book Value,26.668,15.573,2.954,8.326,86.897,55.188,13.262,5.781,40.021,34.527,...,19.45,10.471,10.247,37.451,12.127,12.768,5.147,37.181,30.191,2.15


#### переиндексация

ошибка:

In [None]:
sp500.loc[['MMM', 'ABBV', 'NEW VALUE']]

KeyError: "['NEW VALUE'] not in index"

делаем переиндексацию, задав метки MMM, ABBV и NEW VALUE

In [None]:
reindexed = sp500.reindex(index=['MMM', 'ABBV', 'NEW VALUE'])

обратите внимание, что все индексы, кромя перечисленных при вызове, удалены, а *NEW VALUE* содержит значения *NaN*

In [None]:
reindexed

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABBV,Health Care,53.95,2.954
NEW VALUE,,,


выполняем переиндексацию столбцов

In [None]:
sp500.reindex(columns=['Price', 'Book Value', 'NewCol']).head()

Unnamed: 0_level_0,Price,Book Value,NewCol
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,141.14,26.668,
ABT,39.6,15.573,
ABBV,53.95,2.954,
ACN,79.79,8.326,
ACE,102.91,86.897,


при этом можем заполнить отсутствующие значения константами вместо *NaN*

In [None]:
sp500.reindex(columns=['Price',
                       'Book Value',
                       'NewCol'],
              fill_value=0).head()

Unnamed: 0_level_0,Price,Book Value,NewCol
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,141.14,26.668,0
ABT,39.6,15.573,0
ABBV,53.95,2.954,0
ACN,79.79,8.326,0
ACE,102.91,86.897,0


#### случайная подвыборка

отбираем три случайные строки

In [None]:
sp500.sample(n=3)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ZION,Financials,28.43,30.191
NOV,Energy,81.9,52.925
STX,Information Technology,51.95,8.08


случайный отбор с возвращением

In [None]:
sp500.sample(frac=5, replace=True, random_state=777)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CLF,Materials,16.34,34.523
MHK,Consumer Discretionary,136.41,61.582
BDX,Health Care,115.70,27.510
HOT,Consumer Discretionary,78.73,17.218
CELG,Health Care,150.13,11.200
...,...,...,...
BTU,Energy,17.22,14.644
MCO,Financials,82.50,1.803
LRCX,Information Technology,60.61,29.677
CI,Health Care,89.24,39.304


In [None]:
sp500.sample(frac=0.2, replace=True, random_state=777)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CLF,Materials,16.34,34.523
MHK,Consumer Discretionary,136.41,61.582
BDX,Health Care,115.70,27.510
HOT,Consumer Discretionary,78.73,17.218
CELG,Health Care,150.13,11.200
...,...,...,...
LNC,Financials,48.29,54.937
CBG,Financials,29.15,6.028
BEN,Financials,55.00,17.307
CMG,Consumer Discretionary,522.32,52.915


#### [настройки вывода](http://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#available-options)

In [None]:
pd.options.display.max_rows

60

In [None]:
sp500

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.60,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897
...,...,...,...
YHOO,Information Technology,35.02,12.768
YUM,Consumer Discretionary,74.77,5.147
ZMH,Health Care,101.84,37.181
ZION,Financials,28.43,30.191


In [None]:
pd.options.display.max_rows = 4

In [None]:
sp500

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.60,15.573
...,...,...,...
ZION,Financials,28.43,30.191
ZTS,Health Care,30.53,2.150


In [None]:
pd.options.display.max_rows

4

### Срезы данных

#### Series

Задаём срез по правилу: [начальная позиция: конечная позиция: величина шага], при этом:
- Правая граница - не включается
- Шаг может быть отрицательным
- Позиция также может быть отрицательной - тогда отсчёт происходит "с другого конца"
- Нумерация происходит от нуля

In [None]:
numbers

25   -1.085631
26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

срез, содержащий элементы с позициями от 1 по 5

In [None]:
numbers.iloc[1:6]

26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
dtype: float64

выбираем элементы в позициях 1, 3, 5 == выбираем элементы с 1 по 5 позицию с шагом 2

In [None]:
numbers.iloc[1:6:2]

26    0.997345
28   -1.506295
30    1.651437
dtype: float64

можем оставить только конечную позицию

In [None]:
numbers.iloc[:6]

25   -1.085631
26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
dtype: float64

либо оставим только начальную позицию

In [None]:
numbers.iloc[3:]

28   -1.506295
29   -0.578600
30    1.651437
31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

отбираем элементы Series в обратном порядке, начиная с 5

In [None]:
numbers.iloc[5::-1]

30    1.651437
29   -0.578600
28   -1.506295
27    0.282978
26    0.997345
25   -1.085631
dtype: float64

отбор 4 последних строк

In [None]:
numbers.iloc[-4:]

31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

####  DataFrame

In [None]:
sp500.iloc[:5]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


в обратном порядке

In [None]:
sp500.iloc[4::-1]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACE,Financials,102.91,86.897
ACN,Information Technology,79.79,8.326
ABBV,Health Care,53.95,2.954
ABT,Health Care,39.6,15.573
MMM,Industrials,141.14,26.668


строки, начиная с метки ABT и заканчивая меткой ACN

In [None]:
sp500.loc['ABT':'ACN']

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326


### Копирование и ссылки

In [None]:
numbers

Unnamed: 0,0
25,-1.085631
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786
30,1.651437
31,-2.426679
32,-0.428913
33,1.265936
34,-0.86674


элементы с 1 по 4

In [None]:
numbers.iloc[[1,2,3,4]]

Unnamed: 0,0
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786


сохранили в переменную n

In [None]:
n = numbers.iloc[[1,2,3,4]]

In [None]:
n

Unnamed: 0,0
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786


присваиваем значение 0 всем элементам

In [None]:
n.loc[:] = 0
n

Unnamed: 0,0
26,0.0
27,0.0
28,0.0
29,0.0


что-нибудь произошло с numbers?

In [None]:
numbers

Unnamed: 0,0
25,-1.085631
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786
30,1.651437
31,-2.426679
32,-0.428913
33,1.265936
34,-0.86674


еще раз сохраним первые 4 элемента

In [None]:
n = numbers.iloc[[1,2,3,4]]
n

Unnamed: 0,0
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786


создаем переменную k = срез с 1 по 4 элемент

In [None]:
k = numbers[1:5]
k.loc[:] = 0
k

Unnamed: 0,0
26,0.0
27,0.0
28,0.0
29,0.0


In [None]:
numbers

Unnamed: 0,0
25,-1.085631
26,0.0
27,0.0
28,0.0
29,0.0
30,1.651437
31,-2.426679
32,-0.428913
33,1.265936
34,-0.86674


воcстановили numbers

In [None]:
numbers[1:5] = n
numbers

Unnamed: 0,0
25,-1.085631
26,0.997345
27,0.282978
28,-1.506295
29,-0.5786
30,1.651437
31,-2.426679
32,-0.428913
33,1.265936
34,-0.86674


###  Удаление

#### del

Series

In [None]:
Simpsons

First name
Homer     120
Marge      60
Bart       35
Lisa       30
Maggie      7
Name: Simpsons weight, dtype: int64

In [None]:
Simpsons_copy = Simpsons.copy()
del Simpsons_copy['Maggie']
Simpsons_copy

First name
Homer    120
Marge     60
Bart      35
Lisa      30
Name: Simpsons weight, dtype: int64

DataFrame

In [None]:
sp500.head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


In [None]:
sp500_copy = sp500.copy()
del sp500_copy['Price']
sp500_copy.iloc[:2]

Unnamed: 0_level_0,Sector,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,Industrials,26.668
ABT,Health Care,15.573


In [None]:
sp500_copy

Unnamed: 0_level_0,Sector,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,Industrials,26.668
ABT,Health Care,15.573
ABBV,Health Care,2.954
ACN,Information Technology,8.326
ACE,Financials,86.897
...,...,...
YHOO,Information Technology,12.768
YUM,Consumer Discretionary,5.147
ZMH,Health Care,37.181
ZION,Financials,30.191


#### pop

In [None]:
sp500_copy = sp500.copy()

In [None]:
sp500_copy.head(3)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954


эта строка удалит столбец Sector и возвратит его как серию

In [None]:
popped_column = sp500_copy.pop('Sector')

столбец Sector удален на месте

In [None]:
sp500_copy.head(3)

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ABT,39.6,15.573
ABBV,53.95,2.954


и у нас есть столбец Sector, полученный в результате применения pop

In [None]:
popped_column.head(3)

Symbol
MMM     Industrials
ABT     Health Care
ABBV    Health Care
Name: Sector, dtype: object

Для Series применение .pop идентично

#### drop

In [None]:
sp500_copy = sp500.copy()

In [None]:
sp500_copy.head(3)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954


- эта строка вернет новый датафрейм с удаленным столбцом 'Sector’
- копия датафрейма не изменится

In [None]:
sp500_copy_after_drop = sp500_copy.drop(['Sector'], axis = 1)
sp500_copy_after_drop.head(3)

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ABT,39.6,15.573
ABBV,53.95,2.954


In [None]:
sp500_copy_after_drop

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ABT,39.60,15.573
ABBV,53.95,2.954
ACN,79.79,8.326
ACE,102.91,86.897
...,...,...
YHOO,35.02,12.768
YUM,74.77,5.147
ZMH,101.84,37.181
ZION,28.43,30.191


In [None]:
sp500_copy.head(3)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954


In [None]:
 sp500_copy.drop(['Sector'], axis = 1, inplace = True)

In [None]:
sp500_copy.head(3)

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ABT,39.6,15.573
ABBV,53.95,2.954


получаем копию первых 5 строк датафрейма data

In [None]:
sp500_part_copy = sp500.iloc[:5].copy()
sp500_part_copy

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


удаляем строки с метками ABT и ACN

In [None]:
sp500_part_copy = sp500_part_copy.drop(['ABT', 'ACN'], axis=0)
sp500_part_copy.head(5)

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABBV,Health Care,53.95,2.954
ACE,Financials,102.91,86.897


Для Series применение .drop идентично

### Фильтрация по условию

#### Series

In [None]:
numbers

25   -1.085631
26    0.997345
27    0.282978
28   -1.506295
29   -0.578600
30    1.651437
31   -2.426679
32   -0.428913
33    1.265936
34   -0.866740
dtype: float64

какие строки имеют значения больше 0 и меньше 1?

In [None]:
logical_results = (numbers > 0) & (numbers < 1)
logical_results

25    False
26     True
27     True
28    False
29    False
30    False
31    False
32    False
33    False
34    False
dtype: bool

Скобки!!! Следующий программный код приведет к выдаче исключения
```python
numbers > 0 & numbers < 1
```

тип полученного результата - Series, который можно использовать для отбора интерсующих нас значений

In [None]:
type(logical_results)

отбираем строки со значением True

In [None]:
numbers[logical_results]

26    0.997345
27    0.282978
dtype: float64

использование метода .where

In [None]:
numbers.where((numbers > 0) & (numbers < 1))

25         NaN
26    0.997345
27    0.282978
28         NaN
29         NaN
30         NaN
31         NaN
32         NaN
33         NaN
34         NaN
dtype: float64

In [None]:
numbers.where((numbers > 0) & (numbers < 1), other = -1)

25   -1.000000
26    0.997345
27    0.282978
28   -1.000000
29   -1.000000
30   -1.000000
31   -1.000000
32   -1.000000
33   -1.000000
34   -1.000000
dtype: float64

все ли элементы >= 0?

In [None]:
(numbers >= 0).all()

False

есть ли элемент < 2?

In [None]:
(numbers < 2).any()

True

In [None]:
(numbers < 2).all()

True

сколько значений < 1?

In [None]:
numbers < 1

25     True
26     True
27     True
28     True
29     True
30    False
31     True
32     True
33    False
34     True
dtype: bool

In [None]:
(numbers < 1).sum()

8

#### DataFrame

какие строки имеют значения Price < 100?

In [None]:
sp500.Price < 100

Symbol
MMM     False
ABT      True
ABBV     True
ACN      True
ACE     False
        ...  
YHOO     True
YUM      True
ZMH     False
ZION     True
ZTS      True
Name: Price, Length: 500, dtype: bool

теперь получим строки, в которых Price < 100

In [None]:
sp500[sp500.Price < 100]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABT,Health Care,39.60,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ADBE,Information Technology,64.30,13.262
AES,Utilities,13.61,5.781
...,...,...,...
XYL,Industrials,38.42,12.127
YHOO,Information Technology,35.02,12.768
YUM,Consumer Discretionary,74.77,5.147
ZION,Financials,28.43,30.191


извлекаем лишь те строки, в которых значение Price < 10 и > 6

In [None]:
r = sp500[(sp500['Price'] < 10) &
          (sp500.Price > 6)] ['Price']
r

Symbol
HCBK    9.80
HBAN    9.10
SLM     8.82
WIN     9.38
Name: Price, dtype: float64

извлекаем строки, в которых переменная Sector принимает значение Health Care, а переменная Price больше или равна 100.00

In [None]:
r = sp500[(sp500.Sector == 'Health Care') &
          (sp500.Price >= 100.00)] [['Price', 'Sector']]
r

Unnamed: 0_level_0,Price,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ACT,213.77,Health Care
ALXN,162.30,Health Care
AGN,166.92,Health Care
AMGN,114.33,Health Care
BCR,146.62,Health Care
...,...,...
REGN,297.77,Health Care
TMO,115.74,Health Care
WAT,100.54,Health Care
WLP,108.82,Health Care


использование метода .isin

In [None]:
s_tmp = sp500.Sector.isin(['Information Technology', 'Financials'])
s_tmp

Symbol
MMM     False
ABT     False
ABBV    False
ACN      True
ACE      True
        ...  
YHOO     True
YUM     False
ZMH     False
ZION     True
ZTS     False
Name: Sector, Length: 500, dtype: bool

In [None]:
sp500[s_tmp].head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897
ADBE,Information Technology,64.3,13.262
AFL,Financials,61.31,34.527
AKAM,Information Technology,53.65,15.193


использование метода .query

In [None]:
r = sp500[(sp500.Sector == 'Health Care') &
          (sp500.Price >= 100.00)] [['Price', 'Sector']]
r

Unnamed: 0_level_0,Price,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ACT,213.77,Health Care
ALXN,162.30,Health Care
AGN,166.92,Health Care
AMGN,114.33,Health Care
BCR,146.62,Health Care
...,...,...
REGN,297.77,Health Care
TMO,115.74,Health Care
WAT,100.54,Health Care
WLP,108.82,Health Care


In [None]:
q = sp500.query("Sector=='Health Care' & Price >= 100")[['Price', 'Sector']]
q

Unnamed: 0_level_0,Price,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ACT,213.77,Health Care
ALXN,162.30,Health Care
AGN,166.92,Health Care
AMGN,114.33,Health Care
BCR,146.62,Health Care
...,...,...
REGN,297.77,Health Care
TMO,115.74,Health Care
WAT,100.54,Health Care
WLP,108.82,Health Care


### Добавление

#### оператор [ ]

создаем копию, чтобы исходные данные остались в неизменном виде

In [None]:
sp500_copy = sp500.copy()

добавляем столбец

In [None]:
sp500_copy['RoundedPrice'] = sp500_copy.Price.round()
sp500_copy.head(3)

Unnamed: 0_level_0,Sector,Price,Book Value,RoundedPrice
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MMM,Industrials,141.14,26.668,141.0
ABT,Health Care,39.6,15.573,40.0
ABBV,Health Care,53.95,2.954,54.0


#### метод .insert()

создаем копию, чтобы исходные данные остались в неизменном виде

In [None]:
sp500_copy = sp500.copy()

вставляем столбец RoundedPrice в качестве третьего столбца датафрейма

In [None]:
sp500_copy.insert(1, 'RoundedPrice', sp500_copy.Price.round())
sp500_copy.head(3)

Unnamed: 0_level_0,Sector,RoundedPrice,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MMM,Industrials,141.0,141.14,26.668
ABT,Health Care,40.0,39.6,15.573
ABBV,Health Care,54.0,53.95,2.954


#### метод .assign()

создаем копию, чтобы исходные данные остались в неизменном виде

In [None]:
sp500_copy = sp500.copy()

одновременное добавление двух столбцов:

In [None]:
sp500_copy.assign(Rounded_Price=sp500_copy.Price.round(),
                  R_BookValue_Price=lambda x: (x['Book Value'] / x['Rounded_Price']))

Unnamed: 0_level_0,Sector,Price,Book Value,Rounded_Price,R_BookValue_Price
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MMM,Industrials,141.14,26.668,141.0,0.189135
ABT,Health Care,39.60,15.573,40.0,0.389325
ABBV,Health Care,53.95,2.954,54.0,0.054704
ACN,Information Technology,79.79,8.326,80.0,0.104075
ACE,Financials,102.91,86.897,103.0,0.843660
...,...,...,...,...,...
YHOO,Information Technology,35.02,12.768,35.0,0.364800
YUM,Consumer Discretionary,74.77,5.147,75.0,0.068627
ZMH,Health Care,101.84,37.181,102.0,0.364520
ZION,Financials,28.43,30.191,28.0,1.078250


### Выравнивание данных

#### Series

первая серия для примеров

In [None]:
s_1 = pd.Series(data=[77,33,11],index=['a','b','f'])
s_1

a    77
b    33
f    11
dtype: int64

вторая серия для примеров

In [None]:
s_2 = pd.Series(data=[11,5,6],index=['c','b','a'])
s_2

c    11
b     5
a     6
dtype: int64

для непересекающейся части индексов будут получены значения NaN

In [None]:
s_1+s_2

a    83.0
b    38.0
c     NaN
f     NaN
dtype: float64

метки не обязательно должны быть уникальными

In [None]:
s_1 = pd.Series(data=[77, 33, 15, 3], index=['a', 'a', 'a', 'd'])
s_1

a    77
a    33
a    15
d     3
dtype: int64

In [None]:
s_2 = pd.Series(data=[11, 5, 6], index=['c', 'a', 'a'])
s_2

c    11
a     5
a     6
dtype: int64

3 метки 'а' и 2 метки 'а', результат 6 меток а

In [None]:
s_2+s_1

a    82.0
a    38.0
a    20.0
a    83.0
a    39.0
a    21.0
c     NaN
d     NaN
dtype: float64

#### DataFrame

In [None]:
sp500_part_1 = sp500.iloc[0:5, 0:2].copy()
sp500_part_2 = sp500.iloc[2:7, 1:3].copy()

In [None]:
sp500_part_1

Unnamed: 0_level_0,Sector,Price
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,Industrials,141.14
ABT,Health Care,39.6
ABBV,Health Care,53.95
ACN,Information Technology,79.79
ACE,Financials,102.91


In [None]:
sp500_part_2

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
ABBV,53.95,2.954
ACN,79.79,8.326
ACE,102.91,86.897
ACT,213.77,55.188
ADBE,64.3,13.262


In [None]:
sp500_part_1 + sp500_part_2

Unnamed: 0_level_0,Book Value,Price,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABBV,,107.9,
ABT,,,
ACE,,205.82,
ACN,,159.58,
ACT,,,
ADBE,,,
MMM,,,


происходит выравнивание при создании датафрейма

In [None]:
series_1 = pd.Series([70, 90])
series_2 = pd.Series([71, 91])
series_3 = pd.Series([85, 87], index=[1, 2])
df = pd.DataFrame({'col_1': series_1,
                   'col_2': series_2,
                   'col_3': series_3})
df

Unnamed: 0,col_1,col_2,col_3
0,70.0,71.0,
1,90.0,91.0,85.0
2,,,87.0


### Сортировка

#### по индексу

In [None]:
sp500.head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


In [None]:
sp500.sort_index().head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,Health Care,56.18,16.928
AA,Materials,13.52,9.67
AAPL,Information Technology,614.13,139.46
ABBV,Health Care,53.95,2.954
ABC,Health Care,71.64,9.43


In [None]:
sp500.sort_index(axis=1).head()

Unnamed: 0_level_0,Book Value,Price,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,26.668,141.14,Industrials
ABT,15.573,39.6,Health Care
ABBV,2.954,53.95,Health Care
ACN,8.326,79.79,Information Technology
ACE,86.897,102.91,Financials


#### по значению

In [None]:
sp500.sort_values(by='Price').head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BEAM,Consumer Discretionary,0.0,
FTR,Telecommunications Services,5.81,3.989
SLM,Financials,8.82,11.895
HBAN,Financials,9.1,6.995
WIN,Telecommunications Services,9.38,1.199


In [None]:
sp500.sort_values(by='Price', ascending=False).head()

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PCLN,Industrials,1197.12,137.886
GHC,Consumer Discretionary,677.29,0.0
AAPL,Information Technology,614.13,139.46
GOOG,Information Technology,552.7,135.977
AZO,Consumer Discretionary,540.9,-51.275


#### наименьшее / наибольшее значение

In [None]:
sp500.nsmallest(5, 'Price')

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BEAM,Consumer Discretionary,0.0,
FTR,Telecommunications Services,5.81,3.989
SLM,Financials,8.82,11.895
HBAN,Financials,9.1,6.995
WIN,Telecommunications Services,9.38,1.199


In [None]:
sp500.nlargest(5, 'Price')

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PCLN,Industrials,1197.12,137.886
GHC,Consumer Discretionary,677.29,0.0
AAPL,Information Technology,614.13,139.46
GOOG,Information Technology,552.7,135.977
AZO,Consumer Discretionary,540.9,-51.275
