# Analyze NASDAQ ITCH protocol

In [2]:
import gzip
import shutil
from pathlib import Path
from urllib.request import urlretrieve
from urllib.parse import urljoin
from datetime import datetime
from struct import unpack
from collections import namedtuple, Counter
from datetime import timedelta
from time import time

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

## Download Sample Data

In [None]:
URL = 'https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/'
SOURCE_FILE = '03272019.NASDAQ_ITCH50.gz' # .gz 5.13GB, unzipped 11.99GB

data_path = Path('data')
itch_store = str(data_path / 'itch.h5')
order_book_store = data_path / 'order_book.h5'

url = urljoin(URL, SOURCE_FILE)
filename = data_path / url.split('/')[-1]
if not data_path.exists():
    print('Creating directory')
    data_path.mkdir()
if not filename.exists():
    print('Downloading...', url)
    urlretrieve(url, filename)
unzipped = data_path / (filename.stem + '.bin')
if not (data_path / unzipped).exists():
    print('Unzipping to', unzipped)
    with gzip.open(str(filename), 'rb') as f_in:
        with open(unzipped, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)


Creating directory
Downloading... https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/03272019.NASDAQ_ITCH50.gz
Unzipping to data/03272019.NASDAQ_ITCH50.bin


## Message Formats

In [None]:
# https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHSpecification.pdf

In [None]:
event_codes = {'O': 'Start of Messages',
               'S': 'Start of System Hours',
               'Q': 'Start of Market Hours',
               'M': 'End of Market Hours',
               'E': 'End of System Hours',
               'C': 'End of Messages'}

encoding = {'primary_market_maker': {'Y': 1, 'N': 0},
            'printable'           : {'Y': 1, 'N': 0},
            'buy_sell_indicator'  : {'B': 1, 'S': -1},
            'cross_type'          : {'O': 0, 'C': 1, 'H': 2},
            'imbalance_direction' : {'B': 0, 'S': 1, 'N': 0, 'O': -1}}

formats = {
    ('integer', 2): 'H',
    ('integer', 4): 'I',
    ('integer', 6): '6s',
    ('integer', 8): 'Q',
    ('alpha', 1)  : 's',
    ('alpha', 2)  : '2s',
    ('alpha', 4)  : '4s',
    ('alpha', 8)  : '8s',
    ('price_4', 4): 'I',
    ('price_8', 8): 'Q',
}


In [8]:
message_types_url = 'https://github.com/PacktPublishing/Hands-On-Machine-Learning-for-Algorithmic-Trading/raw/refs/heads/master/Chapter02/01_NASDAQ_TotalView-ITCH_Order_Book/message_types.xlsx'

message_types = (pd.read_excel(message_types_url, sheet_name='messages')
                .sort_values('id').drop('id', axis=1))

message_types.head()

Unnamed: 0,Name,Offset,Length,Value,Notes
0,Message Type,0,1,S,System Event Message
1,Stock Locate,1,2,Integer,Always 0
2,Tracking Number,3,2,Integer,Nasdaq internal tracking number
3,Timestamp,5,6,Integer,Nanoseconds since midnight
4,Event Code,11,1,Alpha,See System Event Codes below


In [10]:
message_types.columns

Index(['Name ', 'Offset ', 'Length ', 'Value ', 'Notes '], dtype='object')

In [16]:
def clean_message_types(df):
    df.columns = [c.lower().strip() for c in df.columns]
    df.value = df.value.str.strip()
    df.name = (df.name
               .str.strip()
               .str.lower()
               .str.replace(' ', '_')
               .str.replace('-', '_')
               .str.replace('/', '_'))
    df.notes = df.notes.str.strip()
    df['message_type'] = df.loc[df.name == 'message_type', 'value']
    return df

In [17]:
df = clean_message_types(message_types)
df.head()

Unnamed: 0,name,offset,length,value,notes,message_type
0,message_type,0,1,S,System Event Message,S
1,stock_locate,1,2,Integer,Always 0,
2,tracking_number,3,2,Integer,Nasdaq internal tracking number,
3,timestamp,5,6,Integer,Nanoseconds since midnight,
4,event_code,11,1,Alpha,See System Event Codes below,


In [20]:
df.groupby('message_type').count()

Unnamed: 0_level_0,name,offset,length,value,notes
message_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,1,1,1,1,1
B,1,1,1,1,1
C,1,1,1,1,1
D,1,1,1,1,1
E,1,1,1,1,1
F,1,1,1,1,1
H,1,1,1,1,1
I,1,1,1,1,1
J,1,1,1,1,1
K,1,1,1,1,1
