# IbView1 Python3.6.4

In [1]:
import linecache
import sys
import pdb
import os
import io
import avro.datafile
import avro.io
import numpy as np
import pandas as pd
import matplotlib as plt

IbDataLogger is logging data to (on Bill's MacBook Pro):

    /Users/OneDrive - Entertel Technologies/SharedOneNote/LoggerLogs/Data

In [2]:
# For now, just hardwire our path to the logged data
DataFilePath = '/home/bill/Data'

File names consist of two parts: a data descriptor and a logger suffix.


The data descriptor for underlying data is:

    'YYYY-MM-DD-Underlying'.
    
The data descriptor for an option is:

    'SPX-YYYY-MM-DD-SSSS-CP-QueuedAtHH-mm-ss' where
    YYYY-MM-DD is the expiration date,
    SSSS is the strike price,
    CP is CALL or PUT and
    HH-mm-ss is the time at which this option was queued to be logged.

The logger suffix is YYYYMMDD-HH.0.log where

    YYYYMMDD is today's date and
    HH is the hour (24 hour time) covered by this file

In [3]:
# Get the list of all files in the logged data directory
DataFileDirectoryList = os.listdir(DataFilePath)

Define a list of file descriptors for sifting through the five kinds of files:

1) Underlying data avro encoded
2) Underlying data json encoded
3) Option data avro encoded
4) Option data json encoded
5) Everything/anything else

In [4]:
class DataFileDescriptor(dict):
    def __init__(self):
        self['FileName'] = 'PlaceHolder'
        self['LogHour'] = 6.0
        self['LogDay'] = 1
        self['LogMonth'] = 1
        self['LogYear'] = 2018
        self['FileType'] = 'UnderlyingAvro'
        self['StrikePrice'] = 2000
        self['ExpirationYear'] = 2018
        self['ExpirationMonth'] = 1
        self['ExpirationDay'] = 1
        self['ContractRight'] = 'CALL'
        self['QueuedHour'] = 6.0
        self['QueuedMinute'] = 1.0
        self['QueuedSecond'] = 1.0
        

Make the list of file desciptors for the files in the logged data directory

In [5]:
ListOfDataFileDescriptors = []
NumberOfUnderlyingJsonFiles = 0
NumberOfUnderlyingAvroFiles = 0
NumberOfOptionJsonFiles = 0
NumberOfOptionAvroFiles = 0
NumberOfOtherFiles = 0
for CurrentIndex in range(0, len(DataFileDirectoryList)-1):
    CurrentFileDescriptor = DataFileDescriptor()
    FileName = DataFileDirectoryList[CurrentIndex]
    CurrentFileDescriptor['FileName'] = FileName

    if FileName[11:25] == 'JsonUnderlying':
        CurrentFileDescriptor['FileType'] = 'UnderlyingJson'
        NumberOfUnderlyingJsonFiles += 1
    elif FileName[11:21] == 'Underlying':
        CurrentFileDescriptor['FileType'] = 'UnderlyingAvro'
        NumberOfUnderlyingAvroFiles += 1
    elif FileName[0:8] == 'SPX-Json':
        CurrentFileDescriptor['FileType'] = 'OptionJson'
        NumberOfOptionJsonFiles += 1
        CurrentFileDescriptor['StrikePrice'] = int(FileName[20:24])
        CurrentFileDescriptor['ExpirationYear'] = int(FileName[9:13])
        CurrentFileDescriptor['ExpirationMonth'] = int(FileName[14:16])
        CurrentFileDescriptor['ExpirationDay'] = int(FileName[17:19])
        if FileName[25] == 'P':
            CurrentFileDescriptor['ContractRight'] = 'PUT'
        else:        
            CurrentFileDescriptor['ContractRight'] = 'CALL'
        CurrentFileDescriptor['QueuedHour'] = int(FileName[-25:-23])
        CurrentFileDescriptor['QueuedMinute'] = int(FileName[-22:-20])
        CurrentFileDescriptor['QueuedSecond'] = int(FileName[-19:-17])
    elif FileName[0:3] == 'SPX':
        CurrentFileDescriptor['FileType'] = 'OptionAvro'
        NumberOfOptionAvroFiles += 1
        CurrentFileDescriptor['StrikePrice'] = int(FileName[15:19])
        CurrentFileDescriptor['ExpirationYear'] = int(FileName[4:8])
        CurrentFileDescriptor['ExpirationMonth'] = int(FileName[9:11])
        CurrentFileDescriptor['ExpirationDay'] = int(FileName[12:14])
        if FileName[20] == 'P':
            CurrentFileDescriptor['ContractRight'] = 'PUT'
        else:        
            CurrentFileDescriptor['ContractRight'] = 'CALL'
        CurrentFileDescriptor['QueuedHour'] = int(FileName[-25:-23])
        CurrentFileDescriptor['QueuedMinute'] = int(FileName[-22:-20])
        CurrentFileDescriptor['QueuedSecond'] = int(FileName[-19:-17])
    else:
        CurrentFileDescriptor['FileType'] = 'Other'
        NumberOfOtherFiles += 1
        # Don't complete the end of the for-loop for 'Other' files (which probably shouldn't occur....)
        continue
    CurrentFileDescriptor['LogYear'] = int(FileName[-17:-13])
    CurrentFileDescriptor['LogMonth'] = int(FileName[-13:-11])
    CurrentFileDescriptor['LogDay'] = int(FileName[-11:-9])
    CurrentFileDescriptor['LogHour'] = float(FileName[-8:-4])
    ListOfDataFileDescriptors.append(CurrentFileDescriptor)
print(f'Underlying JSON files: {str(NumberOfUnderlyingJsonFiles)}, Option JSON files: {str(NumberOfOptionJsonFiles)}')
print(f'Underlying Avro files: {str(NumberOfUnderlyingAvroFiles)}, Option Avro files: {str(NumberOfOptionAvroFiles)}')
print(f'Other files: {str(NumberOfOtherFiles)}')


Underlying JSON files: 81, Option JSON files: 2108
Underlying Avro files: 257, Option Avro files: 7576
Other files: 0


Let's pick a single day and look at what's logged that day

In [6]:
MyYear = 2018
MyMonth = 6
MyDay = 18
MyFiles = []
for i in range(0, len(ListOfDataFileDescriptors)):
    if (ListOfDataFileDescriptors[i]['LogYear'] == MyYear and
    ListOfDataFileDescriptors[i]['LogMonth'] == MyMonth and
    ListOfDataFileDescriptors[i]['LogDay'] == MyDay):
        MyFiles.append(ListOfDataFileDescriptors[i])
print('We got ' + str(len(MyFiles)) + ' files')

We got 398 files


Let's sort out the files logged that day

In [7]:
UnsortedUnderlyingJsonFiles = []
UnsortedUnderlyingAvroFiles = []
UnsortedOptionJsonFiles = []
UnsortedOptionAvroFiles = []
UnsortedOtherFiles = []
LowestStrikePrice = 999999
HighestStrikePrice = 0
for i in range(0, len(MyFiles)):
    if MyFiles[i]['FileType'] == 'UnderlyingJson':
        UnsortedUnderlyingJsonFiles.append(MyFiles[i])
    elif MyFiles[i]['FileType'] == 'UnderlyingAvro':
        UnsortedUnderlyingAvroFiles.append(MyFiles[i])
    elif MyFiles[i]['FileType'] == 'OptionJson':
        UnsortedOptionJsonFiles.append(MyFiles[i])
        if MyFiles[i]['StrikePrice'] > HighestStrikePrice:
            HighestStrikePrice = MyFiles[i]['StrikePrice']
        if MyFiles[i]['StrikePrice'] < LowestStrikePrice:
            LowestStrikePrice = MyFiles[i]['StrikePrice']
    elif MyFiles[i]['FileType'] == 'OptionAvro':
        UnsortedOptionAvroFiles.append(MyFiles[i])
        if MyFiles[i]['StrikePrice'] > HighestStrikePrice:
            HighestStrikePrice = MyFiles[i]['StrikePrice']
        if MyFiles[i]['StrikePrice'] < LowestStrikePrice:
            LowestStrikePrice = MyFiles[i]['StrikePrice']
    else:
        UnsortedOtherFiles.append(MyFiles[i])
SortedUnderlyingJsonFiles = sorted(UnsortedUnderlyingJsonFiles, key=lambda filedescriptor: filedescriptor['LogHour'])
SortedUnderlyingAvroFiles = sorted(UnsortedUnderlyingAvroFiles, key=lambda filedescriptor: filedescriptor['LogHour'])
SortedOptionJsonFiles = sorted(UnsortedOptionJsonFiles, key=lambda filedescriptor: filedescriptor['LogHour'])
SortedOptionAvroFiles = sorted(UnsortedOptionAvroFiles, key=lambda filedescriptor: filedescriptor['LogHour'])
StrikePrices = []
for price in range(LowestStrikePrice, HighestStrikePrice + 1, 5):
    StrikePrices.append(price)
print(f'We got {str(len(SortedUnderlyingJsonFiles))} UnderlyingJson, {str(len(SortedUnderlyingAvroFiles))} UnderlyingAvro files')
print(f'   and {str(len(SortedOptionJsonFiles))} OptionJson, {str(len(SortedOptionAvroFiles))} OptionAvro files')
print(f'   and {str(len(UnsortedOtherFiles))} Other files.')
print('UnderlyingAvro files:')
for i in range(0, len(SortedUnderlyingAvroFiles)):
    print(SortedUnderlyingAvroFiles[i]['FileName'])
print('Highest/Lowest strike price: ' + str(HighestStrikePrice) + '/' + str(LowestStrikePrice))
print('Strike prices:')
print(StrikePrices)

We got 7 UnderlyingJson, 7 UnderlyingAvro files
   and 192 OptionJson, 192 OptionAvro files
   and 0 Other files.
UnderlyingAvro files:
2018-06-18-Underlying20180618-06.0.log
2018-06-18-Underlying20180618-07.0.log
2018-06-18-Underlying20180618-08.0.log
2018-06-18-Underlying20180618-09.0.log
2018-06-18-Underlying20180618-10.0.log
2018-06-18-Underlying20180618-11.0.log
2018-06-18-Underlying20180618-12.0.log
Highest/Lowest strike price: 2780/2750
Strike prices:
[2750, 2755, 2760, 2765, 2770, 2775, 2780]


Define some helper functions for extracting data from the logged files

In [8]:
# from apogentus on stackoverflow
def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print(f'Exception in ({filename}, line {lineno} "{line.strip()}"): {exc_obj}')
    
# Accept a string that was (properly, post June 15, 2018) utf-8-encoded from a byte 
#  array and return the byte array from which the string was encoded
def DecodeStringToBytes(String):
    ReturnBytes = bytearray()
    DecodeCounter = 0
    DecodeValue = 0
    CharNumber = 0
    CharValue = 0
    for Char in String:
        CharValue = Char
        CharNumber += 1
        if DecodeCounter == 0:
            # DecodeCounter == 0 means
            # we're not currently in the process of converting an escape sequence
            # either a backslash-escaped character or a backslash-escapted, 4 character binary byte value
            if ord(Char) == 92:
                # this character is a backslash so it's either
                # the beginning of a 4-char binary value encoded as \xnn or
                # the backslash of a two-character escape sequence.
                DecodeCounter = 1
                DecodeValue = 0
            else:
                # this char is not a backslash so it's a byte whose value is a printable ASCII character
                # so add it as is to the byte array
                ReturnBytes.append(ord(Char))
        elif DecodeCounter == 1:
            # DecodeCounter == 1 means
            # this is a character following a backslash
            # ?? is it an escaped generic binary value ??
            if ord(Char) == 120:
                # it's the x of '\xnn' so ignore it and move on to collect the two hex digits following
                DecodeCounter = 2
            # ?? is it an escaped ASCII control code ??
            elif ord(Char) == 97:
                # it's the a of a 'Bell' ('\a') so declare an ASCII BEL byte
                ReturnBytes.append(7)
                DecodeCounter = 0
            elif ord(Char) == 98:
                # it's the b of a backspace ('\b') so declare an ASCII BS byte
                ReturnBytes.append(8)
                DecodeCounter = 0
            elif ord(Char) == 116:
                # it's the t of a tab ('\t') so declare an ASCII TAB byte
                ReturnBytes.append(9)
                DecodeCounter = 0
            elif ord(Char) == 110:
                # it's the n of a newline ('\n') so declare an ASCII LF byte
                ReturnBytes.append(10)
                DecodeCounter = 0
            elif ord(Char) == 118:
                # it's the v of a vertical tab ('\v') so declare an ASCII VT byte
                ReturnBytes.append(11)
                DecodeCounter = 0
            elif ord(Char) == 102:
                # it's the f of a form feed ('\f') so declare an ASCII FF byte
                ReturnBytes.append(12)
                DecodeCounter = 0
            elif ord(Char) == 114:
                # it's the r of a carriage return ('\r') so declare an ASCII CR byte
                ReturnBytes.append(13)
                DecodeCounter = 0
            # ?? is it an escaped ASCII printable character ??
            elif ord(Char) == 34:
                # it's the double quote of an escaped double quote so declare an ASCII double quote character code byte
                ReturnBytes.append(34)
                DecodeCounter = 0
            elif ord(Char) == 39:
                # it's the single quote of an escaped single quote so declare an ASCII single quote character code byte
                ReturnBytes.append(39)
                DecodeCounter = 0
            elif ord(Char) == 92:
                # it's the second backslash of an escaped backslash so declare an ASCII backslash character code byte
                ReturnBytes.append(92)
                DecodeCounter = 0
            else:
                # Anything else following a backslash is an error
                ReturnBytes.append(92)
                ReturnBytes.append(ord(Char))
                print(f'Got {ord(Char)} after a backslash at column {CharNumber}')
                Swath = 10
                FirstDisplayChar = max(0, CharNumber - Swath)
                LastDisplayChar = min(len(String)-1, CharNumber + Swath)
                print(f'...{String[FirstDisplayChar:LastDisplayChar]}...')
                DecodeCounter = 0
        elif DecodeCounter == 2:
            # DecodeCounter == 2 means
            # this char is the MSB of a four-char-encoded binary value
            DecodeValue = 16 * IntegerHexValue(Char)
            DecodeCounter = 3
        elif DecodeCounter == 3:
            # DecodeCounter == 3 means
            # this char is the LSB of a four-char-encoded binary value
            DecodeValue += IntegerHexValue(Char)
            ReturnBytes.append(DecodeValue)
            DecodeCounter = 0
        else:
            # DecodeCounter is something other than 0, 1, 2 or 3 so we've got an error
            print(f'Got DecodeCounter == {DecodeCounter}')
            DecodeCounter = 0
    if DecodeCounter != 0:
        # We've processed all characters in the given string
        # DecodeCounter != 0 means we're in the midst of processing an escape sequence
        print(f'DecodeCounter is {DecodeCounter} after the whole string has been processed')
    return ReturnBytes

# Original version that does its best with the flakey data before June 15, 2018
def DecodeStringToBytesPreJune15_2018(String):
    ReturnBytes = bytearray()
    DecodeCounter = 0
    DecodeValue = 0
    CharNumber = 0
    CharValue = 0
    for Char in String:
        CharValue = Char
        CharNumber += 1
        if DecodeCounter == 0:
            # DecodeCounter == 0 means
            # we're not currently in the process of converting an escape sequence
            # either a backslash-escaped character or a backslash-escapted, 4 character binary byte value
            if ord(Char) == 92:
                # this character is a backslash so it's either
                # the beginning of a 4-char binary value encoded as \xnn or
                # the backslash of a two-character escape sequence.
                DecodeCounter = 1
                DecodeValue = 0
            else:
                # this char is not a backslash so it's a byte whose value is a printable ASCII character
                # so add it as is to the byte array
                ReturnBytes.append(ord(Char))
        elif DecodeCounter == 1:
            # DecodeCounter == 1 means
            # this is a character following a backslash
            # ?? is it the 'x' of a generic binary '\xnn' ??
            if ord(Char) == 120:
                # it's the x of '\xnn' so ignore it and move on to collect the two hex digits following
                DecodeCounter = 2
            # ?? is it an escaped ASCII control code ??
            elif ord(Char) == 97:
                # it's the a of a 'Bell' ('\a') so declare an ASCII BEL byte
                ReturnBytes.append(7)
                DecodeCounter = 0
            elif ord(Char) == 98:
                # it's the b of a backspace ('\b') so declare an ASCII BS byte
                ReturnBytes.append(8)
                DecodeCounter = 0
            elif ord(Char) == 116:
                # it's the t of a tab ('\t') so declare an ASCII TAB byte
                ReturnBytes.append(9)
                DecodeCounter = 0
            elif ord(Char) == 110:
                # it's the n of a newline ('\n') so declare an ASCII LF byte
                ReturnBytes.append(10)
                DecodeCounter = 0
            elif ord(Char) == 118:
                # it's the v of a vertical tab ('\v') so declare an ASCII VT byte
                ReturnBytes.append(11)
                DecodeCounter = 0
            elif ord(Char) == 102:
                # it's the f of a form feed ('\f') so declare an ASCII FF byte
                ReturnBytes.append(12)
                DecodeCounter = 0
            elif ord(Char) == 114:
                # it's the r of a carriage return ('\r') so declare an ASCII CR byte
                ReturnBytes.append(13)
                DecodeCounter = 0
            elif ord(Char) == 34:
                # it's the double quote of an escaped double quote so declare an ASCII double quote character code byte
                ReturnBytes.append(34)
                DecodeCounter = 0
            elif ord(Char) == 39:
                # it's the single quote of an escaped single quote so declare an ASCII single quote character code byte
                ReturnBytes.append(39)
                DecodeCounter = 0
            elif ord(Char) == 92:
                # it's a second backslash of an escaped backslash so declare an ASCII backslash character code byte
                ReturnBytes.append(92)
                DecodeCounter = 0
            else:
                # Doesn't seem to match any of the known escape sequences... this is an error
                print(f'Escape sequence error: backslash {Char}')
                ReturnBytes.append(92)
                ReturnBytes.append(ord(Char))
#                 print(f'Got {ord(Char)} after a backslash at column {CharNumber}')
#                 Swath = 10
#                 FirstDisplayChar = max(0, CharNumber - Swath)
#                 LastDisplayChar = min(len(String)-1, CharNumber + Swath)
#                 print(f'...{String[FirstDisplayChar:LastDisplayChar]}...')
                DecodeCounter = 0
        elif DecodeCounter == 2:
            # DecodeCounter == 2 means
            # this char is the MSB of a four-char-encoded binary value
            DecodeValue = 16 * IntegerHexValue(Char)
            DecodeCounter = 3
        elif DecodeCounter == 3:
            # DecodeCounter == 3 means
            # this char is the LSB of a four-char-encoded binary value
            DecodeValue += IntegerHexValue(Char)
            ReturnBytes.append(DecodeValue)
            DecodeCounter = 0
        else:
            # DecodeCounter is something other than 0, 1, 2 or 3 so we've got an error
            print(f'Got DecodeCounter == {DecodeCounter}')
            DecodeCounter = 0
    if DecodeCounter != 0:
        # We've processed all characters in the given string
        # DecodeCounter != 0 means we're in the midst of processing an escape sequence
        print(f'DecodeCounter is {DecodeCounter} after the whole string has been processed')
    return ReturnBytes

def IntegerHexValue(Char):
    if Char == '0':
        return 0
    elif Char == '1':
        return 1
    elif Char == '2':
        return 2
    elif Char == '3':
        return 3
    elif Char == '4':
        return 4
    elif Char == '5':
        return 5
    elif Char == '6':
        return 6
    elif Char == '7':
        return 7
    elif Char == '8':
        return 8
    elif Char == '9':
        return 9
    elif Char == 'a':
        return 10
    elif Char == 'b':
        return 11
    elif Char == 'c':
        return 12
    elif Char == 'd':
        return 13
    elif Char == 'e':
        return 14
    elif Char == 'f':
        return 15
    elif Char == 'A':
        return 10
    elif Char == 'B':
        return 11
    elif Char == 'C':
        return 12
    elif Char == 'D':
        return 13
    elif Char == 'E':
        return 14
    elif Char == 'F':
        return 15


Let's try reading the first underlying file

In [13]:
%%capture captured
FullPathFileName = DataFilePath + '/' + SortedUnderlyingAvroFiles[0]['FileName']
FirstUnderlyingFile = open(FullPathFileName, 'rt')
LineNumber = 0
for Line in FirstUnderlyingFile:
    LineNumber += 1
#     print(f'Line# {LineNumber}')
    TimeStampString, AvroStringWithByteTags = Line.split('---')
    AvroString = AvroStringWithByteTags[2:-2]
    AvroByteArray = DecodeStringToBytes(AvroString)
    AvroByteStream = io.BytesIO(AvroByteArray)
#     if LineNumber == 84 or LineNumber == 104 or LineNumber == 114:
#     if LineNumber == 2:
    if False:
        a = 1
#         print('!!!###*** Line: ' + str(LineNumber))
#         print(TimeStampString)
#         print(AvroString)
#         print(AvroByteArray)
    else:
        try:
#             print(TimeStampString)
#             print(AvroString)
#             print(AvroByteArray)
            reader = avro.datafile.DataFileReader(AvroByteStream, avro.io.DatumReader())
            for datum in reader:
                print('Line: ' + str(LineNumber) + ' at time ' + TimeStampString + ', has price at: ' + str(datum['Last']['Price']))
            reader.close()
        except Exception as e:
            print(f'Exception, line # {str(LineNumber)}: {str(e)}')
    #         break
    AvroByteStream.close()
#     if LineNumber > 10:
#         break
FirstUnderlyingFile.close()


In [14]:
CaptureOutputFile = open('/home/bill/SiftedData/JupyterCapture.txt', 'wt')
CaptureOutputFile.write(captured.stdout)
CaptureOutputFile.close()