# Tick Data from LOBSTER

### Loading Libraries

In [3]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# OS, Path & Time
import os
from pathlib import Path
from itertools import chain
from datetime import datetime, timedelta

# Warnings
import warnings

In [4]:
%matplotlib inline

In [5]:
warnings.filterwarnings('ignore')

In [6]:
sns.set_style('whitegrid')

### Load Orderbook Data

In [7]:
path = Path('data')

In [9]:
list(chain(*[('Ask Price {0},Ask Size {0},Bid Price {0},Bid Size {0}'.format(i)).split(',') for i in range(10)]))

['Ask Price 0',
 'Ask Size 0',
 'Bid Price 0',
 'Bid Size 0',
 'Ask Price 1',
 'Ask Size 1',
 'Bid Price 1',
 'Bid Size 1',
 'Ask Price 2',
 'Ask Size 2',
 'Bid Price 2',
 'Bid Size 2',
 'Ask Price 3',
 'Ask Size 3',
 'Bid Price 3',
 'Bid Size 3',
 'Ask Price 4',
 'Ask Size 4',
 'Bid Price 4',
 'Bid Size 4',
 'Ask Price 5',
 'Ask Size 5',
 'Bid Price 5',
 'Bid Size 5',
 'Ask Price 6',
 'Ask Size 6',
 'Bid Price 6',
 'Bid Size 6',
 'Ask Price 7',
 'Ask Size 7',
 'Bid Price 7',
 'Bid Size 7',
 'Ask Price 8',
 'Ask Size 8',
 'Bid Price 8',
 'Bid Size 8',
 'Ask Price 9',
 'Ask Size 9',
 'Bid Price 9',
 'Bid Size 9']

In [10]:
price = list(chain(*[('Ask Price {0},Bid Price {0}'.format(i)).split(',') for i in range(10)]))
size = list(chain(*[('Ask Size {0},Bid Size {0}'.format(i)).split(',') for i in range(10)]))

cols = list(chain(*zip(price, size)))

In [12]:
# order_data = 'AMZN_2012-06-21_34200000_57600000_orderbook_10.csv'

# orders = pd.read_csv(path / order_data, header=None, names=cols)

In [14]:
# orders.info()

In [15]:
# orders.head()

### Parse Message Data

In [16]:
types = {1: 'submission',
         2: 'cancellation',
         3: 'deletion',
         4: 'execution_visible',
         5: 'execution_hidden',
         7: 'trading_halt'}

In [17]:
trading_date = '2012-06-21'

levels = 10

In [19]:
message_data = 'AMZN_{}_34200000_57600000_message_{}.csv'.format(
    trading_date, levels)
messages = pd.read_csv(path / message_data,
                       header=None,
                       names=['time', 'type', 'order_id', 'size', 'price', 'direction'])
messages.info()

In [20]:
messages.head()

In [21]:
messages.type.map(types).value_counts()

In [22]:
messages.time = pd.to_timedelta(messages.time, unit='s')
messages['trading_date'] = pd.to_datetime(trading_date)
messages.time = messages.trading_date.add(messages.time)
messages.drop('trading_date', axis=1, inplace=True)

messages.head()

### Combine Message & Price Data

In [24]:
data = pd.concat([messages, orders], axis=1)
data.info()

In [25]:
ex = data[data.type.isin([4, 5])]

In [None]:
ex.head()

### Plot Limit Order Prices for Messages with Visible or Hidden Execution

In [26]:
cmaps = {'Bid': 'Blues','Ask': 'Reds'}

In [28]:
fig, ax=plt.subplots(figsize=(14, 8))
time = ex['time'].dt.to_pydatetime()

for i in range(10):
    for t in ['Bid', 'Ask']:
        y, c = ex['{} Price {}'.format(t, i)], ex['{} Size {}'.format(t, i)]
        ax.scatter(x=time, y=y, c=c, cmap=cmaps[t], s=1, vmin=1, vmax=c.quantile(.95))

ax.set_xlim(datetime(2012, 6, 21, 9, 30), datetime(2012, 6, 21, 16, 0))
sns.despine()
fig.tight_layout();