In [36]:
import gzip
from struct import unpack, pack, calcsize
from datetime import timedelta
from collections import deque
import numpy as np

In [57]:
# Offset + length_last - 1
# These values are taken from 
# https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf
message_type_size_dict = {
    b"S" : 11, b"R" : 38, b"H" : 24, b"Y" : 19, b"L" : 25, b"V" : 34, b"W" : 11, b"K" : 27, b"J" : 34, b"h" : 20, b"A" : 35,
    b"F" : 39, b"E" : 30, b"C" : 35, b"X" : 22, b"D" : 18, b"U" : 34, b"P" : 43, b"Q" : 39, b"B" : 18, b"I" : 49, b"N" : 19,
    b"\x00" : 1
}

In [38]:
market_open='0930'
market_close = '1600'

In [39]:
def get_timestamp(binary_value):
    time_stamp = unpack('>6s', binary_value)[0];
    return int.from_bytes(time_stamp, byteorder='big')
    
def handle_system_message():
    global market_o, market_s, market_q, market_m, market_e, market_c
    
    # System Message
    stock_locate, tracking_number = unpack('>HH', msg[0:4])
    event_code = unpack('>c', msg[10:])[0]
    time_stamp = get_timestamp(msg[4:10])
    if event_code == b"O":
        market_o = market_o + 1
        # Start of Messages.Outside oftime stamp messages,the start of day message isthe first message sentin
        # any trading day
    elif event_code == b"S":
        market_s = market_s + 1
        # Start of System hours. This message indicatesthatNASDAQis open and ready to start accepting orders
    elif event_code == b"Q":
        market_q = market_q + 1
        # Start of Market hours. This message isintended to indicate that Market Hours orders are available
        # for execution
    elif event_code == b"M":
        market_m = market_m + 1
        # End of Market hours. This message isintended to indicate that Market Hours orders are no longer
        # available for execution
    elif event_code == b"E":
        market_e = market_e + 1
        # End of System hours. It indicates that Nasdaq is now closed and will not accept any new orderstoday.
        # It is still possible to receive Broken Trade messages and Order Delete messages after the End of Day
    elif event_code == b"C":
        market_c = market_c + 1
        # End of Messages. This is always the last message sent in any trading day.
    converted_time = timedelta(seconds=time_stamp * 1e-9)
    print (stock_locate, tracking_number, converted_time, event_code)

In [40]:
def handle_Add_Order_message(bMPID = False):
    stock = unpack('>8s', msg[23:31])[0]
    time_stamp = get_timestamp(msg[4:10])
    shares = unpack('>I', msg[19:23])[0]
    price = unpack('>I', msg[31:35])[0] * (1e-4)
    converted_time = timedelta(seconds=time_stamp * 1e-9)
    order_ref_number = unpack('>Q', msg[10:18])[0]
    ref_number_stocks.append([order_ref_number, stock])
    

def get_stock(order_ref_number):
    for i in range(len(ref_number_stocks)):
        if ref_number_stocks[i][0] == order_ref_number:
            return (ref_number_stocks[i][1])

def handle_Order_Execute_message():
    printable = unpack('>c', msg[30:31])[0]
    if printable != b'N': # to avoid double counting.
        time_stamp = get_timestamp(msg[4:10])
        shares = unpack('>I', msg[18:22])[0]

        price = unpack('>I', msg[31:])[0] * (1e-4)
        order_ref_number = unpack('>Q', msg[10:18])[0]
        converted_time = timedelta(seconds=time_stamp * 1e-9)
        stock = get_stock(order_ref_number)
        
        match_number = unpack('>Q', msg[22:30])[0]
        
        print ("Execute >>>>>>>>>>.", order_ref_number, stock, converted_time, shares, price, match_number, printable)
        return
        if stock not in stocks_past_cummulative_volume:
            stocks_past_cummulative_volume[stock] = 0
            stocks_past_cummulative_TPV[stock] = 0

        if stock not in stocks_times:
            stocks_times[stock] = deque()
            
        stock_queue = stocks_times[stock]
        stock_queue.append({'time_stamp' : time_stamp,
                            'price' : price})

        while stock_queue[0]['time_stamp'] < (time_stamp - tick_time): 
            stock_queue.popleft()
        
        N = len(stock_queue)
        high = price
        low = price
        for i in range(N):
            if stock_queue[i]['price'] > high:
                high = stock_queue[i]['price']
            if stock_queue[i]['price'] < low:
                low = stock_queue[i]['price']
          
        print (">>>>>>>>>>>>>>>> ", N, high, low, price)
        typical_price = (high + low + price)/3
        current_PV = shares * typical_price
        
        stocks_past_cummulative_TPV[stock] = (stocks_past_cummulative_TPV[stock] +  
                                              current_PV) / (stocks_past_cummulative_volume[stock] + shares)
        stocks_past_cummulative_volume[stock] = shares
        
        print (stock, stocks_past_cummulative_TPV[stock], converted_time)
        
def update_the_stock_queues(stock, shares, price, time_stamp):
    if stock not in stocks_queue:
            stocks_queue[stock] = deque()
    stock_queue = stocks_queue[stock]
    stock_queue.append({'time_stamp' : time_stamp,
                        'price'  : price,
                        'volumn' : shares})
    
    while stock_queue[0]['time_stamp'] < (time_stamp - tick_time): 
            stock_queue.popleft()

    N = len(stock_queue)
    
    VP = 0
    V = 0
    for i in range(N): # don't iterate over current trade
        VP = VP + stock_queue[i]['price'] * stock_queue[i]['volumn']
        V = V + stock_queue[i]['volumn']
        
    vwap = (VP / V) * 10e-4
        
    converted_time = timedelta(seconds = time_stamp * 1e-9)
    print (">>>>>>>>>>>>>", stock, vwap, converted_time)
    
def handle_trade_C_message():
    (shares, stock, price) = unpack('>I8sI', msg[10:30])
    time_stamp = get_timestamp(msg[4:10])
    converted_time = timedelta(seconds = time_stamp * 1e-9)
    
    print ("Trade C >>>>>>>>>>.", stock, converted_time, shares, price)
    
    update_the_stock_queues(stock, shares, price, time_stamp)
    
def handle_trade_NC_message():
    (shares, stock, price) = unpack('>I8sI', msg[19:35])
    time_stamp = get_timestamp(msg[4:10])
    converted_time = timedelta(seconds = time_stamp * 1e-9)
    
    print ("Trade NC >>>>>>>>>>.", stock, converted_time, shares, price)
    update_the_stock_queues(stock, shares, price, time_stamp)

In [43]:
#bin_data = open("C:/Users/Amandeep/Downloads/NASDAQ_DATA/01302019.NASDAQ_ITCH50/01302019.NASDAQ_ITCH50", "rb")
bin_data = open("20190130.PSX_ITCH_50", "rb")
message_type = bin_data.read(1);
market_o, market_s, market_q, market_m, market_e, market_c = (0, 0, 0, 0, 0, 0)
number_of_messages = 0
ref_number_stocks = []
stocks_queue = {}
stocks_current_avg = {}
stocks_past_cummulative_volume = {}
stocks_past_cummulative_TPV = {}
 # rolling average of Min = 6e10 --- Hour = 3.6e12
tick_time = 6e10
while message_type:
    msg = bin_data.read(message_type_size_dict[message_type])
    number_of_messages += 1
    # trading Message
    if message_type == b"A":
        handle_Add_Order_message()
    elif message_type == b"F":
        handle_Add_Order_message(bMPID = True)
    elif message_type == b"C":
        #pass
        handle_Order_Execute_message()
    elif message_type == b"S":
        handle_system_message()
    elif message_type == b"P":
        handle_trade_NC_message()
    elif message_type == b"Q":
        handle_trade_C_message()
    message_type = bin_data.read(1);
    if number_of_messages > 100000: # for all change to 100000000 or remove this if and break!
        break
    
print (number_of_messages, market_o, market_s, market_q, market_m, market_e, market_c)

0 0 3:06:20.838940 b'O'
0 0 8:00:00.000025 b'S'
100001 1 1 0 0 0 0


In [44]:
calcsize('>HHhL8ccc4c')
calcsize('>HHhLc')
calcsize('HHI')

# Making a 6 byte timestamp
def unpack48(x):
    x1, x2, x3 = struct.unpack('<HHI', x)
    return x1, x2 | (x3 << 16)
calcsize('HH6cc')

11

In [45]:
(1024).to_bytes(2, byteorder='big')

b'\x04\x00'

In [2]:
%reload_ext Cython

In [47]:
import numpy as np
size = 400
iterations = 100

In [48]:
%%cython -a
import numpy as np

def mandelbrot_cython(m, int size, iterations):
    for i in range(size):
        for j in range(size):
            c = -2 + 3./size*j + 1j*(1.5-3./size*i)
            z = 0
            for n in range(iterations):
                if np.abs(z) <= 10:
                    z = z*z + c
                    m[i, j] = n
                else:
                    break

In [49]:
%%timeit -n1 -r1 m = np.zeros(s, dtype=np.int32)
mandelbrot_cython(m, size, iterations)

8.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [50]:
%%cython -a
import numpy as np

def mandelbrot_cython(int[:,::1] m,
                      int size,
                      int iterations):
    cdef int i, j, n
    cdef complex z, c
    for i in range(size):
        for j in range(size):
            c = -2 + 3./size*j + 1j*(1.5-3./size*i)
            z = 0
            for n in range(iterations):
                if z.real**2 + z.imag**2 <= 100:
                    z = z*z + c
                    m[i, j] = n
                else:
                    break

In [51]:
%%timeit -n1 -r1 m = np.zeros(s, dtype=np.int32)
mandelbrot_cython(m, size, iterations)

485 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [6]:
%%cython -a
from collections import deque
from datetime import timedelta
from struct import unpack

message_type_size_dict = {
    b"S" : 11, b"R" : 38, b"H" : 24, b"Y" : 19, b"L" : 25, b"V" : 34, b"W" : 11, b"K" : 27, b"J" : 34, b"h" : 20, b"A" : 35,
    b"F" : 39, b"E" : 30, b"C" : 35, b"X" : 22, b"D" : 18, b"U" : 34, b"P" : 43, b"Q" : 39, b"B" : 18, b"I" : 49, b"N" : 19,
    b"\x00" : 1
}

cpdef long long get_timestamp(bytes binary_value):
    time_stamp = unpack('>6s', binary_value)[0];
    return int.from_bytes(time_stamp, byteorder='big')

cpdef void update_the_stock_queues(bytes stock, shares, price, time_stamp):
    if stock not in stocks_queue:
            stocks_queue[stock] = deque()
    stock_queue = stocks_queue[stock]
    stock_queue.append({'time_stamp' : time_stamp,
                        'price'  : price,
                        'volumn' : shares})
    while stock_queue[0]['time_stamp'] < (time_stamp - tick_time):
        stock_queue.popleft()
    cdef int N = len(stock_queue)
    
    cdef double VP = 0
    cdef double V = 0
    for i in range(N):
        VP = VP + stock_queue[i]['price'] * stock_queue[i]['volumn']
        V = V + stock_queue[i]['volumn']
        
    cdef float vwap = (VP // V) * 0.0001 #10e-4  
    #converted_time = timedelta(seconds = time_stamp * 1e-9)
    vmap_row = ','.join([str(stock), str(vwap), str(time_stamp)])
    big_result.append(vmap_row)#''.join(vmap_row)
    
cpdef void handle_trade_C_message(bytes msg):
    (shares, stock, price) = unpack('>Q8sI', msg[10:30])
    if shares > 0:
        time_stamp = get_timestamp(msg[4:10])
        update_the_stock_queues(stock, shares, price, time_stamp)
    
cpdef void handle_trade_NC_message(bytes msg):
    (shares, stock, price) = unpack('>I8sI', msg[19:35])
    time_stamp = get_timestamp(msg[4:10])
    update_the_stock_queues(stock, shares, price, time_stamp)
    

bin_data = open("C:/Users/Amandeep/Downloads/NASDAQ_DATA/01302019.NASDAQ_ITCH50/01302019.NASDAQ_ITCH50", "rb")
#bin_data = open("20190130.PSX_ITCH_50", "rb")

cdef bytes message_type = bin_data.read(1)
cdef long long number_of_messages = 0
stocks_queue = {}
cdef long long tick_time = 3600000000000 #3.6e12
cdef bytes msg
big_result = []
while message_type: 
    msg = bin_data.read(message_type_size_dict[message_type])
    if message_type == b'P':
        handle_trade_NC_message(msg)
    elif message_type == b'Q':
        handle_trade_C_message(msg)
   # pass
    message_type = bin_data.read(1);
    #if number_of_messages > 1000000: # for all change to 100000000 or remove this if and break!
    #    break
    #number_of_messages += 1

print("Number of messages = {}".format(number_of_messages))
textfile = open('stock_vwap_full.txt', 'w')
textfile.write('\n'.join(big_result))
textfile.close()

Number of messages = 0


In [69]:
%%cython -a

message_type_size_dict = {
    b"S" : 11, b"R" : 38, b"H" : 24, b"Y" : 19, b"L" : 25, b"V" : 34, b"W" : 11, b"K" : 27, b"J" : 34, b"h" : 20, b"A" : 35,
    b"F" : 39, b"E" : 30, b"C" : 35, b"X" : 22, b"D" : 18, b"U" : 34, b"P" : 43, b"Q" : 39, b"B" : 18, b"I" : 49, b"N" : 19,
    b"\x00" : 1
}
print(message_type_size_dict[b"X"])

22


In [251]:
str(timedelta(seconds = 14865802201366 * 1e-9))

'4:07:45.802201'

In [193]:
','.join(['nums' for num in range(10)])

'nums,nums,nums,nums,nums,nums,nums,nums,nums,nums'

In [32]:
import pandas as pd
vwaps = pd.read_csv("stock_vwap_full.txt", header=None)

Unnamed: 0,1,2
count,17982.0,17982.0
mean,303.300505,48991870000000.0
std,2.818157,9687685000000.0
min,293.49881,18392770000000.0
25%,300.788628,39920480000000.0
50%,302.531891,50499360000000.0
75%,306.365601,57581130000000.0
max,308.080811,71997330000000.0


In [33]:
vwaps.loc[vwaps[0] == "b'TSLA    '"].describe()

Unnamed: 0,1,2
count,17982.0,17982.0
mean,303.300505,48991870000000.0
std,2.818157,9687685000000.0
min,293.49881,18392770000000.0
25%,300.788628,39920480000000.0
50%,302.531891,50499360000000.0
75%,306.365601,57581130000000.0
max,308.080811,71997330000000.0


In [13]:
vwaps

Unnamed: 0,0,1,2
0,b'UGAZ ',39.160000,14400199973027
1,b'SPY ',264.399994,14404669286712
2,b'AMZN ',1613.709961,14447194467427
3,b'SPY ',264.346008,14447304877085
4,b'AAPL ',162.720001,14455381649776
5,b'AAPL ',162.720001,14455509813002
6,b'TQQQ ',42.709999,14501611344879
7,b'UGAZ ',38.852100,14509571706384
8,b'TQQQ ',42.709999,14510205812099
9,b'PCG ',13.570000,14525549421689
