## a.Load the trade and quote data as in class

In [1]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import datetime

def show_time( label_string ):
    ts = time.time()
    st = datetime.datetime.fromtimestamp( ts ).strftime( '%Y-%m-%d %H:%M:%S:%f' )
    print( label_string + ' : ' + st )

### Get Market Data

In [2]:
# load trades
show_time( 'started loading trades' )
trades = pd.read_csv( 'SQ_trades_201902.zip' )
show_time( 'finished loading trades')

# index trades
show_time( 'started fixing trade index')

# fix column names
trades = trades.rename( index=str, columns={ 'SIZE' : 'trade_qty', 'PRICE' : 'trade_price' } )

# fix index
times = trades[ 'DATE' ].astype( str ) + trades[ 'TIME_M' ].astype( str )
formatStr = '%Y%m%d%H:%M:%S.%f'
trades.index = pd.to_datetime( times, format = formatStr )

# clean up unused columns
trades = trades.drop( columns=[ 'DATE', 'TIME_M', 'SYM_ROOT', 'SYM_SUFFIX', 'TR_CORR', 'TR_SEQNUM', 'TR_RF' ] )
show_time( 'finished fixing trade index' )

started loading trades : 2019-04-26 21:21:04:682716
finished loading trades : 2019-04-26 21:21:07:846897
started fixing trade index : 2019-04-26 21:21:07:847897
finished fixing trade index : 2019-04-26 21:21:17:798466


In [6]:
# load quotes
show_time( 'started loading quotes' )
quotes = pd.read_csv( 'SQ_quotes_20190201.csv' )
show_time( 'finished loading quotes')

# index trades
show_time( 'started fixing quotes index' )

# fix column names
quotes = quotes.rename( index=str, columns={ 'BID' : 'bid_price', 'BIDSIZ' : 'bid_qty', 'ASK' : 'ask_price', 'ASKSIZ' : 'ask_qty' } )

# fix index
times = quotes[ 'DATE' ].astype( str ) + ' '+quotes[ 'TIME_M' ].astype( str )
formatStr = '%Y%m%d %H:%M:%S.%f'
quotes.index = pd.to_datetime( times, format = formatStr )

# cleanup unused columns
quotes = quotes.drop( columns=[ 'DATE', 'TIME_M', 'QU_SEQNUM', 'QU_SOURCE', 'SYM_ROOT', 'SYM_SUFFIX' ] )
show_time( 'finished fixing quotes index' )

started loading quotes : 2019-04-26 21:24:51:429685
finished loading quotes : 2019-04-26 21:24:53:077780
started fixing quotes index : 2019-04-26 21:24:53:077780
finished fixing quotes index : 2019-04-26 21:24:55:014890


In [7]:
daytrades = trades['2019-02-01 09:30:00':'2019-02-01 16:00:00']
dayquotes = quotes['2019-02-01 09:30:00':'2019-02-01 16:00:00']

In [8]:
# now merge quote and tick data
show_time( 'start merge' )
taq = dayquotes.merge( daytrades, how = 'outer', left_index = True, right_index = True )
show_time( 'end merge' )

start merge : 2019-04-26 21:25:41:810567
end merge : 2019-04-26 21:25:42:364599


In [9]:
taq.tail()

Unnamed: 0,EX_x,bid_price,bid_qty,ask_price,ask_qty,QU_COND,NATBBO_IND,QU_CANCEL,EX_y,TR_SCOND,trade_qty,trade_price,TR_SOURCE
2019-02-01 16:00:00.000000,J,66.97,1.0,0.0,0.0,R,A,,,,,,
2019-02-01 16:00:00.000000,B,66.97,1.0,0.0,0.0,R,A,,,,,,
2019-02-01 16:00:00.000000,Z,70.77,6.0,72.5,1.0,R,A,,,,,,
2019-02-01 16:00:00.155493,,,,,,,,,P,M,900.0,70.8,C
2019-02-01 16:00:00.371051,,,,,,,,,T,M,100.0,70.81,C


In [12]:
taq.to_pickle("SQ_taq_20190201.gz")

In [13]:
trades.to_pickle("SQ_trades_20190201.gz")

In [14]:
quotes.to_pickle("SQ_quotes_20190201.gz")

In [15]:
print(len(taq))
print(len(trades))
print(len(quotes))

1074274
1158991
1014024
