# Школа алготрейдеров. Блок торгового ПО и программирования
## Занятие 2. Стандартная библиотека языка Python. Обработка табличных данных с помощью библиотеки pandas

### Материалы
- [Интерактивный учебник по основам питона на русском языке](http://pythontutor.ru)
- [Learning IPython for Interactive Computing and Data Visualization, second edition](http://ipython-books.github.io/minibook/) by Cyrille Rossant

### [Тип `set`](https://docs.python.org/3/library/stdtypes.html#set) (неупорядоченные множества)

In [1]:
s = {1, 7, 'abc', 3.2}
s

{'abc', 3.2, 1, 7}

In [2]:
len(s), list(s)

(4, ['abc', 3.2, 1, 7])

In [3]:
for x in s:
    print(x * 2)

abcabc
6.4
2
14


In [4]:
[x * 2 for x in s]

['abcabc', 6.4, 2, 14]

#### Получить все уникальные элементы списка

In [5]:
a = [x % 7 for x in range(10) if x % 3 in (0, 2)]
a

[0, 2, 3, 5, 6, 1, 2]

In [6]:
set(a)

{0, 1, 2, 3, 5, 6}

In [7]:
list(set(a))

[0, 1, 2, 3, 5, 6]

In [8]:
{x % 7 for x in range(10) if x % 3 in (0, 2)}

{0, 1, 2, 3, 5, 6}

#### Операции с множествами

In [9]:
a = set()  # пустое множество
b = {2, 3, 4}
a.add(1)
a.add(2)
b.remove(4)
a, b

({1, 2}, {2, 3})

In [10]:
a | b

{1, 2, 3}

In [11]:
a & b

{2}

In [12]:
a - b

{1}

In [13]:
a ^ b

{1, 3}

In [14]:
a ^= b
a

{1, 3}

In [15]:
1 in a, 2 in a

(True, False)

In [16]:
{3, 1, 2} > {3, 2}

True

In [19]:
{3, 1, 2} > {2, 1, 3}

False

In [20]:
{3, 1, 2} >= {2, 1, 3}

True

#### Ограничения на элементы

In [21]:
s = set()
s.add(8)
s.add('text')
s.add(True)
s

{8, True, 'text'}

In [22]:
s.add([5, 6, 7])  # элементы изменяемых типов нельзя класть в set

TypeError: unhashable type: 'list'

In [23]:
s.add((5, 6, 7))
s

{8, True, (5, 6, 7), 'text'}

#### Неизменяемый `set` — `frozenset`

In [24]:
s.add({1})

TypeError: unhashable type: 'set'

In [25]:
s.update((frozenset({2, 3}), frozenset({3, 2})))
s

{8, True, (5, 6, 7), 'text', frozenset({2, 3})}

#### Особенности

In [26]:
{False, True, 0, 1, 0.0, 1.0, None}

{0.0, 1.0, None}

In [27]:
0 == False == 0.0 != 1 == True == 1.0

True

### [Тип `dict`](https://docs.python.org/3/library/stdtypes.html#mapping-types-dict) (словари)

In [28]:
d = {5: 'five', '7': 'seven', 3.14: '≈ pi', (9, 0): []}
d

{(9, 0): [], 3.14: '≈ pi', 5: 'five', '7': 'seven'}

In [29]:
type({})  # пустой словарь

dict

In [30]:
d[9, 0]

[]

In [31]:
d[9, 0] = 49
d

{(9, 0): 49, 3.14: '≈ pi', 5: 'five', '7': 'seven'}

In [32]:
d[frozenset({-3, 'ab'})] = {89}
d

{(9, 0): 49,
 3.14: '≈ pi',
 frozenset({'ab', -3}): {89},
 5: 'five',
 '7': 'seven'}

In [33]:
{x: x ** 2 for x in range(10)}

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}

In [34]:
a = {1: 2}
b = a
b[1] = 89
a, b

({1: 89}, {1: 89})

In [35]:
from copy import deepcopy
a = {1: 2}
b = deepcopy(a)
b[1] = 89
a, b

({1: 2}, {1: 89})

In [36]:
d = {'One': 1, 'Two': 2, 'Three': 3}
for x in d:
    print(x)

One
Two
Three


In [37]:
for key in d.keys():
    print(key)

One
Two
Three


In [38]:
type(d.keys())

dict_keys

In [39]:
for value in d.values():
    print(value)

1
2
3


In [40]:
for item in d.items():
    print(item)

('One', 1)
('Two', 2)
('Three', 3)


In [41]:
for key, value in d.items():
    print('d[{}] = {}'.format(key, value))

d[One] = 1
d[Two] = 2
d[Three] = 3


In [42]:
list(d.items())

[('One', 1), ('Two', 2), ('Three', 3)]

#### Распаковка словарей

In [43]:
a = {'one': 1, 'two': 2}
{'three': 3, **a, 'four': 4}

{'four': 4, 'one': 1, 'three': 3, 'two': 2}

In [44]:
'one = {one}, two = {two}'.format(**a)

'one = 1, two = 2'

### Функции

In [45]:
def find_all_squares_in_range(max_value):
    squares = []
    square_base = 0
    while square_base ** 2 <= max_value:
        squares.append(square_base ** 2)
        square_base += 1
    return squares

In [46]:
find_all_squares_in_range(0)

[0]

In [47]:
find_all_squares_in_range(111)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [48]:
sum(find_all_squares_in_range(10))

14

In [49]:
def find_all_squares_in_range(max_value, skip_even=False):
    squares = []
    square_base = 0
    while square_base ** 2 <= max_value:
        if not (skip_even and square_base % 2 == 0):
            squares.append(square_base ** 2)
        square_base += 1
    return squares

In [50]:
find_all_squares_in_range(111, True)

[1, 9, 25, 49, 81]

In [51]:
find_all_squares_in_range(111, skip_even=True)

[1, 9, 25, 49, 81]

In [52]:
def strange_abs(x):
    if x < 0:
        return -x

In [53]:
print(strange_abs(-9))

9


In [54]:
print(strange_abs(9))

None


#### [Лямбда-функции](https://docs.python.org/3.5/tutorial/controlflow.html#lambda-expressions)

In [55]:
average = lambda x, y: (x + y) / 2
average

<function __main__.<lambda>>

In [56]:
average(6, 9)

7.5

#### Сортировка по параметру

In [57]:
a = [(1, 'one'), (4, 'four'), (2, 'two'), (3, 'three')]
a.sort()
a

[(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')]

In [58]:
a.sort(key=lambda item: item[1])  # «ключ» для сортировки — 2-й элемент каждого кортежа
a

[(4, 'four'), (1, 'one'), (3, 'three'), (2, 'two')]

In [59]:
companies = ['Finam', 'MOEX', 'Yandex', 'Google']
sorted(companies)

['Finam', 'Google', 'MOEX', 'Yandex']

In [60]:
# сравниваем сначала по длине, затем по названию; обратный порядок
sorted(companies, key=lambda name: (len(name), name), reverse=True)

['Yandex', 'Google', 'Finam', 'MOEX']

#### Обработка произвольного числа аргументов

In [61]:
def print_all_args(*args, **kwargs):
    print('Positional arguments: {},\nkeyword arguments: {}'.format(args, kwargs))
    
print_all_args(2, 'text', None, first=1, second=2, third=3)

Positional arguments: (2, 'text', None),
keyword arguments: {'third': 3, 'second': 2, 'first': 1}


#### Генераторы

In [62]:
def find_all_squares_in_range_better(max_value):
    square_base = 0
    while square_base ** 2 <= max_value:
        yield square_base ** 2
        square_base += 1

In [63]:
type(find_all_squares_in_range_better(10))

generator

In [64]:
sum(find_all_squares_in_range_better(10))

14

In [65]:
list(find_all_squares_in_range_better(10))

[0, 1, 4, 9]

### [PEP8](https://www.python.org/dev/peps/pep-0008/): рекомендации по оформлению кода

### [`Fraction`](https://docs.python.org/3/library/fractions.html#module-fractions) — рациональные дроби

### [`Decimal`](https://docs.python.org/3/library/decimal.html#module-decimal) — вещественные числа произвольной точности

### [Модуль `datetime`](https://docs.python.org/3/library/datetime.html#module-datetime): работа с датой и временем

In [66]:
import datetime as dt
dt.time(21, 4, 39)

datetime.time(21, 4, 39)

In [67]:
from datetime import time
time(21, 4, 39)

datetime.time(21, 4, 39)

In [68]:
ts = dt.datetime(2016, 6, 3, 17, 24, 4)
ts

datetime.datetime(2016, 6, 3, 17, 24, 4)

In [69]:
ts.day

3

In [70]:
ts.minute

24

In [71]:
ts.strftime('%d.%m.%Y %H:%M:%S')

'03.06.2016 17:24:04'

In [72]:
dt.datetime.strptime('03.06.2016 17:24:04', '%d.%m.%Y %H:%M:%S')

datetime.datetime(2016, 6, 3, 17, 24, 4)

In [73]:
lecture_start = dt.datetime(2016, 9, 8, 20, 45)
lecture_end = dt.datetime(2016, 9, 8, 22, 15)
td = lecture_end - lecture_start
td

datetime.timedelta(0, 5400)

In [74]:
td.total_seconds()

5400.0

In [75]:
lecture_start + td

datetime.datetime(2016, 9, 8, 22, 15)

In [76]:
dt.datetime.now()

datetime.datetime(2016, 9, 11, 10, 30, 10, 684015)

### Загрузка данных из файлов

#### [Загружаем котировки](http://www.finam.ru/profile/moex-akcii/pllc-yandex-n-v/export/?market=1&em=388383&code=YNDX&apply=0&df=7&mf=8&yf=2016&from=07.09.2016&dt=7&mt=8&yt=2016&to=07.09.2016&p=7&f=YNDX_160907_160907&e=.txt&cn=YNDX&dtf=1&tmf=1&MSOR=1&mstime=on&mstimever=1&sep=1&sep2=1&datf=1&at=1)

![](finam-2-yndx-params.png)

In [77]:
%ls

 ’®¬ ў гбва®©бвўҐ C ­Ґ Ё¬ҐҐв ¬ҐвЄЁ.
 ‘ҐаЁ©­л© ­®¬Ґа в®¬ : B019-4922

 ‘®¤Ґа¦Ё¬®Ґ Ї ЇЄЁ C:\Users\User\Documents\IPython Notebooks\”Ё­ ¬\jupyter-notebooks

11.09.2016  10:30    <DIR>          .
11.09.2016  10:30    <DIR>          ..
08.09.2016  09:12    <DIR>          .ipynb_checkpoints
11.09.2016  10:26            69я239 finam-1.ipynb
11.09.2016  10:30            42я954 finam-2.ipynb
07.09.2016  22:42            26я248 finam-2-yndx-params.png
07.09.2016  22:09           162я023 YNDX_160901_160907.csv
               4 д ©«®ў        300я464 Ў ©в
               3 Ї Ї®Є  10я772я836я352 Ў ©в бў®Ў®¤­®


In [78]:
filename = 'YNDX_160901_160907.csv'
file = open(filename)

In [79]:
file.read()[:100]

'<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\nYNDX,0,20160901,100003,1430.500000000,7\nYNDX,0,20160901,10'

In [80]:
file = open(filename)

In [81]:
file.readline()

'<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\n'

In [82]:
lines = open(filename).readlines()
lines[:10]

['<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\n',
 'YNDX,0,20160901,100003,1430.500000000,7\n',
 'YNDX,0,20160901,100010,1431.000000000,1\n',
 'YNDX,0,20160901,100010,1431.000000000,23\n',
 'YNDX,0,20160901,100010,1431.000000000,9\n',
 'YNDX,0,20160901,100010,1430.500000000,6\n',
 'YNDX,0,20160901,100014,1438.000000000,1\n',
 'YNDX,0,20160901,100018,1431.500000000,16\n',
 'YNDX,0,20160901,100018,1431.500000000,5\n',
 'YNDX,0,20160901,100018,1431.500000000,5\n']

In [83]:
header, *lines = open(filename)
header, lines[:10]

('<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\n',
 ['YNDX,0,20160901,100003,1430.500000000,7\n',
  'YNDX,0,20160901,100010,1431.000000000,1\n',
  'YNDX,0,20160901,100010,1431.000000000,23\n',
  'YNDX,0,20160901,100010,1431.000000000,9\n',
  'YNDX,0,20160901,100010,1430.500000000,6\n',
  'YNDX,0,20160901,100014,1438.000000000,1\n',
  'YNDX,0,20160901,100018,1431.500000000,16\n',
  'YNDX,0,20160901,100018,1431.500000000,5\n',
  'YNDX,0,20160901,100018,1431.500000000,5\n',
  'YNDX,0,20160901,100018,1430.500000000,21\n'])

In [84]:
header = [name[1:-1].capitalize() for name in header.strip().split(',')]
header

['Ticker', 'Per', 'Date', 'Time', 'Last', 'Vol']

In [85]:
[
    line.strip().split(',')
    for line in lines
][:10]

[['YNDX', '0', '20160901', '100003', '1430.500000000', '7'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '1'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '23'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '9'],
 ['YNDX', '0', '20160901', '100010', '1430.500000000', '6'],
 ['YNDX', '0', '20160901', '100014', '1438.000000000', '1'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '16'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '5'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '5'],
 ['YNDX', '0', '20160901', '100018', '1430.500000000', '21']]

In [86]:
# zip позволяет одновременно пройтись по двум последовательностям одинаковой длины
[
    {
        name: value
        for name, value in zip(header, line.strip().split(','))
    }
    for line in lines
][:5]

[{'Date': '20160901',
  'Last': '1430.500000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100003',
  'Vol': '7'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '1'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '23'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '9'},
 {'Date': '20160901',
  'Last': '1430.500000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '6'}]

### [pandas](http://pandas.pydata.org/pandas-docs/stable/)

In [87]:
import pandas as pd

In [88]:
data = pd.read_csv(filename)
data.head(5)

Unnamed: 0,<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>
0,YNDX,0,20160901,100003,1430.5,7
1,YNDX,0,20160901,100010,1431.0,1
2,YNDX,0,20160901,100010,1431.0,23
3,YNDX,0,20160901,100010,1431.0,9
4,YNDX,0,20160901,100010,1430.5,6


In [89]:
data = pd.read_csv(
    filename,
    parse_dates={'<DATETIME>': ['<DATE>', '<TIME>']},
    index_col='<DATETIME>'
)
data.head(5)

Unnamed: 0_level_0,<TICKER>,<PER>,<LAST>,<VOL>
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-01 10:00:03,YNDX,0,1430.5,7
2016-09-01 10:00:10,YNDX,0,1431.0,1
2016-09-01 10:00:10,YNDX,0,1431.0,23
2016-09-01 10:00:10,YNDX,0,1431.0,9
2016-09-01 10:00:10,YNDX,0,1430.5,6


In [90]:
data.rename(columns=lambda name: name[1:-1].lower(), inplace=True)
data.rename(columns={'last': 'price'}, inplace=True)
data.head(5)

Unnamed: 0_level_0,ticker,per,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-01 10:00:03,YNDX,0,1430.5,7
2016-09-01 10:00:10,YNDX,0,1431.0,1
2016-09-01 10:00:10,YNDX,0,1431.0,23
2016-09-01 10:00:10,YNDX,0,1431.0,9
2016-09-01 10:00:10,YNDX,0,1430.5,6


In [91]:
data.drop(['ticker', 'per'], axis=1, inplace=True)
data.head(5)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:03,1430.5,7
2016-09-01 10:00:10,1431.0,1
2016-09-01 10:00:10,1431.0,23
2016-09-01 10:00:10,1431.0,9
2016-09-01 10:00:10,1430.5,6


In [92]:
data.describe()

Unnamed: 0,price,vol
count,3893.0,3893.0
mean,1446.438351,35.599538
std,18.306256,78.610474
min,1410.5,1.0
25%,1431.5,2.0
50%,1445.5,10.0
75%,1462.0,37.0
max,1482.5,1000.0


In [93]:
data.price.head(5)

<DATETIME>
2016-09-01 10:00:03    1430.5
2016-09-01 10:00:10    1431.0
2016-09-01 10:00:10    1431.0
2016-09-01 10:00:10    1431.0
2016-09-01 10:00:10    1430.5
Name: price, dtype: float64

In [94]:
data.price.min(), data.price.max(), data.price.median(), data.price.std()

(1410.5, 1482.5, 1445.5, 18.306256155837445)

In [95]:
data.iloc[0]

price    1430.5
vol         7.0
Name: 2016-09-01 10:00:03, dtype: float64

In [96]:
data.iloc[1000:1100:10]

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-02 15:17:40,1460.5,5
2016-09-02 15:30:12,1461.5,35
2016-09-02 15:30:13,1464.5,7
2016-09-02 15:33:34,1464.5,191
2016-09-02 16:03:59,1466.5,170
2016-09-02 16:28:00,1462.5,137
2016-09-02 16:28:00,1463.0,9
2016-09-02 16:28:18,1458.5,24
2016-09-02 16:30:36,1464.0,60
2016-09-02 16:30:39,1466.5,120


In [97]:
for i, row in data.iterrows():
    print('Datetime {}, price {}, vol {}'.format(
            row.name, row.price, row.vol
        ))
    break

Datetime 2016-09-01 10:00:03, price 1430.5, vol 7.0


In [98]:
data[(data.vol > 700) & (data.price > 1440)]

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-06 17:13:13,1447.0,836
2016-09-06 17:39:28,1445.5,747
2016-09-06 17:42:53,1445.0,1000


In [99]:
data.vol > 700

<DATETIME>
2016-09-01 10:00:03    False
2016-09-01 10:00:10    False
2016-09-01 10:00:10    False
2016-09-01 10:00:10    False
2016-09-01 10:00:10    False
2016-09-01 10:00:14    False
2016-09-01 10:00:18    False
2016-09-01 10:00:18    False
2016-09-01 10:00:18    False
2016-09-01 10:00:18    False
2016-09-01 10:00:24    False
2016-09-01 10:00:24    False
2016-09-01 10:00:24    False
2016-09-01 10:00:51    False
2016-09-01 10:00:51    False
2016-09-01 10:00:51    False
2016-09-01 10:00:52    False
2016-09-01 10:00:52    False
2016-09-01 10:01:04    False
2016-09-01 10:01:04    False
2016-09-01 10:01:08    False
2016-09-01 10:01:09    False
2016-09-01 10:01:10    False
2016-09-01 10:02:09    False
2016-09-01 10:02:09    False
2016-09-01 10:02:09    False
2016-09-01 10:02:09    False
2016-09-01 10:02:12    False
2016-09-01 10:02:43    False
2016-09-01 10:02:58    False
                       ...  
2016-09-07 18:28:57    False
2016-09-07 18:28:57    False
2016-09-07 18:28:57    False
201

In [100]:
from ipywidgets import interact

@interact
def show_deal_count(vol_threshold=(0, 1000)):
    return len(data[data.vol > vol_threshold])

63

In [101]:
data.resample('2H').mean().head(15)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:00,1434.65748,24.370079
2016-09-01 12:00:00,1436.693548,61.612903
2016-09-01 14:00:00,1436.811688,21.380952
2016-09-01 16:00:00,1457.742515,62.718563
2016-09-01 18:00:00,1461.669118,28.735294
2016-09-01 20:00:00,,
2016-09-01 22:00:00,,
2016-09-02 00:00:00,,
2016-09-02 02:00:00,,
2016-09-02 04:00:00,,


In [102]:
data_gr = data.resample('2H').mean()
data_gr[data_gr.price.notnull()].head(10)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:00,1434.65748,24.370079
2016-09-01 12:00:00,1436.693548,61.612903
2016-09-01 14:00:00,1436.811688,21.380952
2016-09-01 16:00:00,1457.742515,62.718563
2016-09-01 18:00:00,1461.669118,28.735294
2016-09-02 10:00:00,1456.568182,38.462121
2016-09-02 12:00:00,1457.677778,58.933333
2016-09-02 14:00:00,1461.236111,42.486111
2016-09-02 16:00:00,1468.531716,28.365672
2016-09-02 18:00:00,1461.5375,21.95


In [103]:
data_gr[~data_gr.price.isnull()].head(10)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:00,1434.65748,24.370079
2016-09-01 12:00:00,1436.693548,61.612903
2016-09-01 14:00:00,1436.811688,21.380952
2016-09-01 16:00:00,1457.742515,62.718563
2016-09-01 18:00:00,1461.669118,28.735294
2016-09-02 10:00:00,1456.568182,38.462121
2016-09-02 12:00:00,1457.677778,58.933333
2016-09-02 14:00:00,1461.236111,42.486111
2016-09-02 16:00:00,1468.531716,28.365672
2016-09-02 18:00:00,1461.5375,21.95


In [104]:
data.resample('1H').agg('mean').head(10)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:00,1434.044444,23.977778
2016-09-01 11:00:00,1436.148649,25.324324
2016-09-01 12:00:00,1436.5,91.642857
2016-09-01 13:00:00,1436.852941,36.882353
2016-09-01 14:00:00,1444.206522,20.434783
2016-09-01 15:00:00,1434.972973,21.616216
2016-09-01 16:00:00,1449.383178,59.841121
2016-09-01 17:00:00,1461.682819,64.07489
2016-09-01 18:00:00,1461.669118,28.735294
2016-09-01 19:00:00,,


In [105]:
data.resample('1H').agg({
        'vol': 'sum',
        'price': 'mean'
    }).head(10)

Unnamed: 0_level_0,price,vol
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-01 10:00:00,1434.044444,2158.0
2016-09-01 11:00:00,1436.148649,937.0
2016-09-01 12:00:00,1436.5,1283.0
2016-09-01 13:00:00,1436.852941,627.0
2016-09-01 14:00:00,1444.206522,940.0
2016-09-01 15:00:00,1434.972973,3999.0
2016-09-01 16:00:00,1449.383178,6403.0
2016-09-01 17:00:00,1461.682819,14545.0
2016-09-01 18:00:00,1461.669118,1954.0
2016-09-01 19:00:00,,


In [106]:
data_gr = data.resample('1H').agg({
        'vol': 'sum',
        'price': {
            'price_min': 'min',
            'price_max': 'max',
            'price_first': 'first',
            'price_last': 'last'
        }
    }).head(10)
data_gr

Unnamed: 0_level_0,price,price,price,price,vol
Unnamed: 0_level_1,price_max,price_first,price_min,price_last,sum
<DATETIME>,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2016-09-01 10:00:00,1439.0,1430.5,1430.0,1435.5,2158.0
2016-09-01 11:00:00,1440.0,1435.5,1434.0,1440.0,937.0
2016-09-01 12:00:00,1439.5,1436.5,1435.0,1436.0,1283.0
2016-09-01 13:00:00,1440.0,1436.5,1435.0,1440.0,627.0
2016-09-01 14:00:00,1448.5,1441.0,1437.5,1448.0,940.0
2016-09-01 15:00:00,1446.0,1442.0,1431.0,1437.5,3999.0
2016-09-01 16:00:00,1455.5,1440.0,1438.0,1454.5,6403.0
2016-09-01 17:00:00,1469.0,1456.0,1450.0,1460.5,14545.0
2016-09-01 18:00:00,1464.0,1459.0,1457.0,1464.0,1954.0
2016-09-01 19:00:00,,,,,


In [107]:
d = data.resample('1H')
d.agg?

In [108]:
data_gr.iloc[0]

price  price_max      1439.0
       price_first    1430.5
       price_min      1430.0
       price_last     1435.5
vol    sum            2158.0
Name: 2016-09-01 10:00:00, dtype: float64

In [109]:
data_gr.price

Unnamed: 0_level_0,price_max,price_first,price_min,price_last
<DATETIME>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-01 10:00:00,1439.0,1430.5,1430.0,1435.5
2016-09-01 11:00:00,1440.0,1435.5,1434.0,1440.0
2016-09-01 12:00:00,1439.5,1436.5,1435.0,1436.0
2016-09-01 13:00:00,1440.0,1436.5,1435.0,1440.0
2016-09-01 14:00:00,1448.5,1441.0,1437.5,1448.0
2016-09-01 15:00:00,1446.0,1442.0,1431.0,1437.5
2016-09-01 16:00:00,1455.5,1440.0,1438.0,1454.5
2016-09-01 17:00:00,1469.0,1456.0,1450.0,1460.5
2016-09-01 18:00:00,1464.0,1459.0,1457.0,1464.0
2016-09-01 19:00:00,,,,


In [110]:
data_gr.price.price_min

<DATETIME>
2016-09-01 10:00:00    1430.0
2016-09-01 11:00:00    1434.0
2016-09-01 12:00:00    1435.0
2016-09-01 13:00:00    1435.0
2016-09-01 14:00:00    1437.5
2016-09-01 15:00:00    1431.0
2016-09-01 16:00:00    1438.0
2016-09-01 17:00:00    1450.0
2016-09-01 18:00:00    1457.0
2016-09-01 19:00:00       NaN
Freq: H, Name: price_min, dtype: float64