In [1]:
import numpy as np
import tables as tb
from datetime import datetime
from pytz import timezone

In [2]:
import raw_taq_new

You can run this cell to reload raw_taq_new instead of restarting the kernel

In [None]:
from importlib import reload
reload(raw_taq_new)

The two `sample = ...` lines below allow comparison of processed to not-processed reading. Currently, processing is still very slow.

In [4]:
sample = raw_taq_new.TAQ2Chunks('../local_data/EQY_US_ALL_BBO_20150102.zip', do_process_chunk=False)

In [3]:
sample = raw_taq_new.TAQ2Chunks('../local_data/EQY_US_ALL_BBO_20150102.zip', do_process_chunk=True)

In [4]:
sample.month, sample.day, sample.year

(1, 2, 2015)

In [5]:
%prun c = next(sample)

 

In [6]:
c.dtype

dtype([('Time', '<f8'), ('hour', 'i1'), ('minute', 'i1'), ('msec', '<u2'), ('Exchange', 'S1'), ('Symbol_root', 'S6'), ('Symbol_suffix', 'S10'), ('Bid_Price', '<f8'), ('Bid_Size', '<i4'), ('Ask_Price', '<f8'), ('Ask_Size', '<i4'), ('Quote_Condition', 'S1'), ('Market_Maker', 'S4'), ('Bid_Exchange', 'S1'), ('Ask_Exchange', 'S1'), ('Sequence_Number', '<i8'), ('National_BBO_Ind', 'S1'), ('NASDAQ_BBO_Ind', 'S1'), ('Quote_Cancel_Correction', 'S1'), ('Source_of_Quote', 'S1'), ('Retail_Interest_Indicator_RPI', 'S1'), ('Short_Sale_Restriction_Indicator', 'S1'), ('LULD_BBO_Indicator_CQS', 'S1'), ('LULD_BBO_Indicator_UTP', 'S1'), ('FINRA_ADF_MPID_Indicator', 'S1'), ('SIP_generated_Message_Identifier', 'S1'), ('National_BBO_LULD_Indicator', 'S1')])

In [7]:
h5_table = sample.setup_hdf5('sample')

In [8]:
h5_table.append(c)

In [9]:
sample.finalize_hdf5()

In [10]:
h5f = tb.open_file('sample.h5')

In [11]:
h5_table = h5f.root.daily_quotes

In [12]:
# This displays in the locale timezone by default... 
# You need to use datetime or pandas to use arbitrary timezones
dtimes = h5_table.cols.Time[:10].astype('datetime64[s]')
dtimes

array(['2015-01-02T01:00:00-0800', '2015-01-02T02:59:57-0800',
       '2015-01-02T02:59:57-0800', '2015-01-02T02:59:57-0800',
       '2015-01-02T02:59:31-0800', '2015-01-02T03:00:11-0800',
       '2015-01-02T05:00:00-0800', '2015-01-02T04:58:52-0800',
       '2015-01-02T04:58:52-0800', '2015-01-02T04:58:52-0800'], dtype='datetime64[s]')

In [13]:
# Finally getting this right!
datetime.fromtimestamp(h5_table.cols.Time[0], timezone('US/Eastern'))

datetime.datetime(2015, 1, 2, 4, 0, 0, 16000, tzinfo=<DstTzInfo 'US/Eastern' EST-1 day, 19:00:00 STD>)

In [14]:
h5f.close()

In [15]:
# These should be eastern time
c[:10][['hour', 'minute', 'msec']]

array([(4, 0, 16), (6, 21, 17175), (6, 21, 17180), (6, 21, 17185),
       (6, 37, 55503), (6, 38, 35541), (8, 0, 310), (8, 28, 44507),
       (8, 28, 44509), (8, 28, 44511)], 
      dtype=[('hour', 'i1'), ('minute', 'i1'), ('msec', '<u2')])

In [16]:
sample.bytes_spec.target_dtype

[('Time', numpy.float64),
 ('hour', numpy.int8),
 ('minute', numpy.int8),
 ('msec', numpy.uint16),
 ('Exchange', 'S1'),
 ('Symbol_root', 'S6'),
 ('Symbol_suffix', 'S10'),
 ('Bid_Price', numpy.float64),
 ('Bid_Size', numpy.int32),
 ('Ask_Price', numpy.float64),
 ('Ask_Size', numpy.int32),
 ('Quote_Condition', 'S1'),
 ('Market_Maker', 'S4'),
 ('Bid_Exchange', 'S1'),
 ('Ask_Exchange', 'S1'),
 ('Sequence_Number', numpy.int64),
 ('National_BBO_Ind', 'S1'),
 ('NASDAQ_BBO_Ind', 'S1'),
 ('Quote_Cancel_Correction', 'S1'),
 ('Source_of_Quote', 'S1'),
 ('Retail_Interest_Indicator_RPI', 'S1'),
 ('Short_Sale_Restriction_Indicator', 'S1'),
 ('LULD_BBO_Indicator_CQS', 'S1'),
 ('LULD_BBO_Indicator_UTP', 'S1'),
 ('FINRA_ADF_MPID_Indicator', 'S1'),
 ('SIP_generated_Message_Identifier', 'S1'),
 ('National_BBO_LULD_Indicator', 'S1')]

In [17]:
sample.bytes_spec.pytables_desc

{'Ask_Exchange': StringCol(itemsize=1, shape=(), dflt=b'', pos=14),
 'Ask_Price': Float64Col(shape=(), dflt=0.0, pos=9),
 'Ask_Size': Int32Col(shape=(), dflt=0, pos=10),
 'Bid_Exchange': StringCol(itemsize=1, shape=(), dflt=b'', pos=13),
 'Bid_Price': Float64Col(shape=(), dflt=0.0, pos=7),
 'Bid_Size': Int32Col(shape=(), dflt=0, pos=8),
 'Exchange': StringCol(itemsize=1, shape=(), dflt=b'', pos=4),
 'FINRA_ADF_MPID_Indicator': StringCol(itemsize=1, shape=(), dflt=b'', pos=24),
 'LULD_BBO_Indicator_CQS': StringCol(itemsize=1, shape=(), dflt=b'', pos=22),
 'LULD_BBO_Indicator_UTP': StringCol(itemsize=1, shape=(), dflt=b'', pos=23),
 'Market_Maker': StringCol(itemsize=4, shape=(), dflt=b'', pos=12),
 'NASDAQ_BBO_Ind': StringCol(itemsize=1, shape=(), dflt=b'', pos=17),
 'National_BBO_Ind': StringCol(itemsize=1, shape=(), dflt=b'', pos=16),
 'National_BBO_LULD_Indicator': StringCol(itemsize=1, shape=(), dflt=b'', pos=26),
 'Quote_Cancel_Correction': StringCol(itemsize=1, shape=(), dflt=b'',