# Working with Order Book Data

### Loading Libraries

In [1]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Warnings
import warnings

# File Path
import gzip
from pathlib import Path
from urllib.request import urlretrieve
from urllib.parse import urljoin

# Date & Time
from time import time
from datetime import timedelta

# Collections
from collections import namedtuple, Counter, defaultdict

# Operating System
import os
import shutil
from struct import unpack

In [2]:
%matplotlib inline

In [3]:
warnings.filterwarnings('ignore')

In [4]:
sns.set_style('whitegrid')

In [5]:
def format_time(t):
    """Return a formatted time string 'HH:MM:SS
    based on a numeric time() value"""
    m, s = divmod(t, 60)
    h, m = divmod(m, 60)
    return f'{h:0>2.0f}:{m:0>2.0f}:{s:0>2.0f}'

#### Set Data Paths

In [6]:
data_path = Path('data') 

itch_store = str(data_path / 'itch.h5')

order_book_store = data_path / 'order_book.h5'

date = '10302019'

### Build Order Book

In [7]:
stock = 'AAPL'

order_dict = {-1: 'sell', 1: 'buy'}

### Get All Messages for Given Stock

In [9]:
def get_messages(date, stock=stock):
    """Collect trading messages for given stock"""
    with pd.HDFStore(itch_store) as store:
        stock_locate = store.select('R', where='stock = stock').stock_locate.iloc[0]
        target = 'stock_locate = stock_locate'

        data = {}
        # Trading Message Types
        messages = ['A', 'F', 'E', 'C', 'X', 'D', 'U', 'P', 'Q']
        for m in messages:
            data[m] = store.select(m, where=target).drop('stock_locate', axis=1).assign(type=m)

    order_cols = ['order_reference_number', 'buy_sell_indicator', 'shares', 'price']
    orders = pd.concat([data['A'], data['F']], sort=False, ignore_index=True).loc[:, order_cols]

    for m in messages[2: -3]:
        data[m] = data[m].merge(orders, how='left')

    data['U'] = data['U'].merge(orders, how='left',
                                right_on='order_reference_number',
                                left_on='original_order_reference_number',
                                suffixes=['', '_replaced'])

    data['Q'].rename(columns={'cross_price': 'price'}, inplace=True)
    data['X']['shares'] = data['X']['cancelled_shares']
    data['X'] = data['X'].dropna(subset=['price'])

    data = pd.concat([data[m] for m in messages], ignore_index=True, sort=False)
    data['date'] = pd.to_datetime(date, format='%m%d%Y')
    data.timestamp = data['date'].add(data.timestamp)
    data = data[data.printable != 0]

    drop_cols = ['tracking_number', 'order_reference_number', 'original_order_reference_number',
                 'cross_type', 'new_order_reference_number', 'attribution', 'match_number',
                 'printable', 'date', 'cancelled_shares']
    return data.drop(drop_cols, axis=1).sort_values('timestamp').reset_index(drop=True)

In [10]:
messages = get_messages(date=date)

messages.info(null_counts=True)

FileNotFoundError: ``/Users/joaquinromero/Desktop/MLAT/chap_02/data`` does not exist