# IbView1 Python3.6.4

In [1]:
import linecache
import sys
import pdb
import os
import io
import avro.datafile
import avro.io
import numpy as np
import pandas as pd
import matplotlib as plt

IbDataLogger is logging data to (on Bill's MacBook Pro):

    /Users/OneDrive - Entertel Technologies/SharedOneNote/LoggerLogs/Data

In [2]:
# For now, just hardwire our path to the logged data
DataFilePath = '/home/bill/Data'

File names consist of two parts: a data descriptor and a logger suffix.


The data descriptor for underlying data is:

    'YYYY-MM-DD-Underlying'.
    
The data descriptor for an option is:

    'SPX-YYYY-MM-DD-SSSS-CP-QueuedAtHH-mm-ss' where
    YYYY-MM-DD is the expiration date,
    SSSS is the strike price,
    CP is CALL or PUT and
    HH-mm-ss is the time at which this option was queued to be logged.

The logger suffix is YYYYMMDD-HH.0.log where

    YYYYMMDD is today's date and
    HH is the hour (24 hour time) covered by this file

In [3]:
# Get the list of all files in the logged data directory
DataFileDirectoryList = os.listdir(DataFilePath)

Define a list of file descriptors for sifting through the three kinds of files:

1) Underlying data
2) Option data
3) Everything/anything else

In [4]:
class DataFileDescriptor(dict):
    def __init__(self):
        self['FileName'] = 'PlaceHolder'
        self['LogHour'] = 6.0
        self['LogDay'] = 1
        self['LogMonth'] = 1
        self['LogYear'] = 2018
        self['FileType'] = 'Underlying'
        self['StrikePrice'] = 2000
        self['ExpirationYear'] = 2018
        self['ExpirationMonth'] = 1
        self['ExpirationDay'] = 1
        self['ContractRight'] = 'CALL'
        self['QueuedHour'] = 6.0
        self['QueuedMinute'] = 1.0
        self['QueuedSecond'] = 1.0
        

Make the list of file desciptors for the files in the logged data directory

In [5]:
ListOfDataFileDescriptors = []
NumberOfUnderlyingFiles = 0
NumberOfUnderlyingJsonFiles = 0
NumberOfOptionFiles = 0
NumberOfOptionJsonFiles = 0
NumberOfOtherFiles = 0
for CurrentIndex in range(0, len(DataFileDirectoryList)-1):
    CurrentFileDescriptor = DataFileDescriptor()
    FileName = DataFileDirectoryList[CurrentIndex]
    CurrentFileDescriptor['FileName'] = FileName
    
    # For now - just step over the JSON versions of the files
    if FileName[4:8] == 'Json':
        NumberOfOptionJsonFiles += 1
        continue
    if FileName[11:15] == 'Json':
        NumberOfUnderlyingJsonFiles += 1
        continue
    # For now - just step over the JSON versions of the files
        
    if FileName[0:3] == 'SPX':
        CurrentFileDescriptor['FileType'] = 'Option'
        NumberOfOptionFiles += 1
        CurrentFileDescriptor['StrikePrice'] = int(FileName[15:19])
        CurrentFileDescriptor['ExpirationYear'] = int(FileName[4:8])
        CurrentFileDescriptor['ExpirationMonth'] = int(FileName[9:11])
        CurrentFileDescriptor['ExpirationDay'] = int(FileName[12:14])
        if FileName[20] == 'P':
            CurrentFileDescriptor['ContractRight'] = 'PUT'
        else:        
            CurrentFileDescriptor['ContractRight'] = 'CALL'
        CurrentFileDescriptor['QueuedHour'] = int(FileName[-25:-23])
        CurrentFileDescriptor['QueuedMinute'] = int(FileName[-22:-20])
        CurrentFileDescriptor['QueuedSecond'] = int(FileName[-19:-17])
    elif FileName[11:21] == 'Underlying':
        CurrentFileDescriptor['FileType'] = 'Underlying'
        NumberOfUnderlyingFiles += 1
    else:
        CurrentFileDescriptor['FileType'] = 'Other'
        NumberOfOtherFiles += 1
        continue
    CurrentFileDescriptor['LogYear'] = int(FileName[-17:-13])
    CurrentFileDescriptor['LogMonth'] = int(FileName[-13:-11])
    CurrentFileDescriptor['LogDay'] = int(FileName[-11:-9])
    CurrentFileDescriptor['LogHour'] = float(FileName[-8:-4])
    ListOfDataFileDescriptors.append(CurrentFileDescriptor)
print(f'Underlying: {str(NumberOfUnderlyingFiles)}, Option: {str(NumberOfOptionFiles)}, Other: {str(NumberOfOtherFiles)}')
print(f'Underlying JSON files: {str(NumberOfUnderlyingJsonFiles)}, Option JSON files: {str(NumberOfOptionJsonFiles)}')

Underlying: 189, Option: 5864, Other: 0
Underlying JSON files: 13, Option JSON files: 396


Let's pick a single day and look at what's logged that day

In [6]:
MyYear = 2018
MyMonth = 5
MyDay = 9
MyFiles = []
for i in range(0, len(ListOfDataFileDescriptors)):
    if (ListOfDataFileDescriptors[i]['LogYear'] == MyYear and
    ListOfDataFileDescriptors[i]['LogMonth'] == MyMonth and
    ListOfDataFileDescriptors[i]['LogDay'] == MyDay):
        MyFiles.append(ListOfDataFileDescriptors[i])
print('We got ' + str(len(MyFiles)) + ' files')

We got 264 files


Let's sort out the files logged that day

In [7]:
UnsortedUnderlyingFiles = []
MyOptionFiles = []
MyOtherFiles = []
LowestStrikePrice = 999999
HighestStrikePrice = 0
for i in range(0, len(MyFiles)):
    if MyFiles[i]['FileType'] == 'Underlying':
        UnsortedUnderlyingFiles.append(MyFiles[i])
    elif MyFiles[i]['FileType'] == 'Option':
        MyOptionFiles.append(MyFiles[i])
        if MyFiles[i]['StrikePrice'] > HighestStrikePrice:
            HighestStrikePrice = MyFiles[i]['StrikePrice']
        if MyFiles[i]['StrikePrice'] < LowestStrikePrice:
            LowestStrikePrice = MyFiles[i]['StrikePrice']
    else:
        MyOtherFiles.append(MyFiles[i])
MyUnderlyingFiles = sorted(UnsortedUnderlyingFiles, key=lambda filedescriptor: filedescriptor['LogHour'])
MyStrikePrices = []
for price in range(LowestStrikePrice, HighestStrikePrice + 1, 5):
    MyStrikePrices.append(price)
print('We got ' + str(len(MyUnderlyingFiles)) + ' Underlying, \n' +
                     str(len(MyOptionFiles)) + ' Option and \n' +
                     str(len(MyOtherFiles)) + ' Other files.')
print('Underlying:')
for i in range(0, len(MyUnderlyingFiles)):
    print(MyUnderlyingFiles[i]['FileName'])
print('Highest/Lowest strike price: ' + str(HighestStrikePrice) + '/' + str(LowestStrikePrice))
print('Strike prices:')
print(MyStrikePrices)

We got 8 Underlying, 
256 Option and 
0 Other files.
Underlying:
2018-05-09-Underlying20180509-06.0.log
2018-05-09-Underlying20180509-07.0.log
2018-05-09-Underlying20180509-08.0.log
2018-05-09-Underlying20180509-09.0.log
2018-05-09-Underlying20180509-10.0.log
2018-05-09-Underlying20180509-11.0.log
2018-05-09-Underlying20180509-12.0.log
2018-05-09-Underlying20180509-13.0.log
Highest/Lowest strike price: 2710/2665
Strike prices:
[2665, 2670, 2675, 2680, 2685, 2690, 2695, 2700, 2705, 2710]


Define some helper functions for extracting data from the logged files

In [8]:
# from apogentus on stackoverflow
def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print(f'Exception in ({filename}, line {lineno} "{line.strip()}"): {exc_obj}')
    
# (copied from older Python code's IbDataViewerUtilities.py)
# Accept a string that was utf-8-encoded from a byte array and return the byte array from which the string was encoded
def DecodeStringToBytes(String):
    ReturnBytes = bytearray()
    DecodeCounter = 0
    DecodeValue = 0
    CharNumber = 0
    CharValue = 0
    for Char in String:
        CharValue = Char
        CharNumber += 1
        if DecodeCounter == 0:
            # we're not currently in the process of converting 4 chars to a byte
            if Char == '\\':
                # this char may be the beginning of a 4-char set
                DecodeCounter = 1
                DecodeValue = 0
            else:
                # this char is just another char so add it to the byte array
                ReturnBytes.append(ord(Char))
        else:
            if DecodeCounter == 1:
                # This is the character following a backslash
#                 if Char == 'x' or Char == 'X':
                if Char == 'x':
                    # it's the x of '\xnn' so ignore it and move on to collect the two hex digits following
                    DecodeCounter = 2
#                 elif Char == 'a' or Char == 'A':
#                 elif Char == 'a':
#                     # it's the a of a 'Bell' ('\a') so declare an ASCII BEL byte
#                     ReturnBytes.append(7)
#                     DecodeCounter = 0
#                 elif Char == 'b' or Char == 'B':
                elif Char == 'b':
                    # it's the b of a backspace ('\b') so declare an ASCII BS byte
                    ReturnBytes.append(8)
                    DecodeCounter = 0
#                 elif Char == 't' or Char == 'T':
                elif Char == 't':
                    # it's the t of a tab ('\t') so declare an ASCII TAB byte
                    ReturnBytes.append(9)
                    DecodeCounter = 0
#                 elif Char == 'n' or Char == 'N':
                elif Char == 'n':
                    # it's the n of a newline ('\n') so declare an ASCII LF byte
                    ReturnBytes.append(10)
                    DecodeCounter = 0
#                 elif Char == 'v' or Char == 'V':
                elif Char == 'v':
                    # it's the v of a vertical tab ('\v') so declare an ASCII VT byte
                    ReturnBytes.append(11)
                    DecodeCounter = 0
#                 elif Char == 'f' or Char == 'F':
                elif Char == 'f':
                    # it's the f of a form feed ('\f') so declare an ASCII FF byte
                    ReturnBytes.append(12)
                    DecodeCounter = 0
#                 elif Char == 'r' or Char == 'R':
                elif Char == 'r':
                    # it's the r of a carriage return ('\r') so declare an ASCII CR byte
                    ReturnBytes.append(13)
                    DecodeCounter = 0
                elif Char == '"':
                    # it's the double quote of an escaped double quote so declare an ASCII double quote character code byte
                    ReturnBytes.append(34)
                    DecodeCounter = 0
                elif Char == '\'':
                    # it's the single quote of an escaped single quote so declare an ASCII single quote character code byte
                    ReturnBytes.append(39)
                    DecodeCounter = 0
                    # Perhaps the initial/original avro encoding does NOT escape a lone backslash character???
                    # ... so a second backslash means the first one was just an ASCII backslash and this second
                    # ... one is an escape character???
                elif Char == '\\':
#                         # it's the second backslash of an escaped backslash character so declare an ASCII backslash byte
#                         ReturnBytes.append(92)
#                         DecodeCounter = 0
                    # it's a second backslash so append the first one as an actual slash and start the decode over
                    ReturnBytes.append(ord('\\'))
                    DecodeCounter = 1
                else:
#                         #else we got a not-yet-known escaped sequence so
#                         print('\nGot {0} after a backslash'.format(Char))
#                         ReturnBytes.append(ord('\\'))
#                         ReturnBytes.append(ord(Char))
#                         DecodeCounter = 0
                    # this character isn't know as an escape sequence's 2nd character so assume the preceding
                    # backslash was simply a backslash
                    ReturnBytes.append(ord('\\'))
                    ReturnBytes.append(ord(Char))
                    DecodeCounter = 0
            elif DecodeCounter == 2:
                # this char is the MSB of the encoded value
                DecodeValue = 16 * IntegerHexValue(Char)
                DecodeCounter = 3
            else:
                # this char is the LSB of the encoded value
                DecodeValue += IntegerHexValue(Char)
                ReturnBytes.append(DecodeValue)
                DecodeCounter = 0
    if DecodeCounter == 1:
        ReturnBytes.append(ord('\\'))
    return ReturnBytes

def IntegerHexValue(Char):
    if Char == '0':
        return 0
    elif Char == '1':
        return 1
    elif Char == '2':
        return 2
    elif Char == '3':
        return 3
    elif Char == '4':
        return 4
    elif Char == '5':
        return 5
    elif Char == '6':
        return 6
    elif Char == '7':
        return 7
    elif Char == '8':
        return 8
    elif Char == '9':
        return 9
    elif Char == 'a':
        return 10
    elif Char == 'b':
        return 11
    elif Char == 'c':
        return 12
    elif Char == 'd':
        return 13
    elif Char == 'e':
        return 14
    elif Char == 'f':
        return 15
    elif Char == 'A':
        return 10
    elif Char == 'B':
        return 11
    elif Char == 'C':
        return 12
    elif Char == 'D':
        return 13
    elif Char == 'E':
        return 14
    elif Char == 'F':
        return 15


Let's try reading the first underlying file

In [9]:
%%capture captured
FullPathFileName = DataFilePath + '/' + MyUnderlyingFiles[2]['FileName']
FirstUnderlyingFile = open(FullPathFileName, 'rt')
LineNumber = 0
for Line in FirstUnderlyingFile:
    LineNumber += 1
    TimeStampString, AvroStringWithByteTags = Line.split('---')
    AvroString = AvroStringWithByteTags[2:-2]
    AvroByteArray = DecodeStringToBytes(AvroString)
    AvroByteStream = io.BytesIO(AvroByteArray)
#     if LineNumber == 84 or LineNumber == 104 or LineNumber == 114:
    if False:
        print('!!!###*** Line: ' + str(LineNumber))
        print(TimeStampString)
        print(AvroString)
        print(AvroByteArray)
    else:
        reader = avro.datafile.DataFileReader(AvroByteStream, avro.io.DatumReader())
        for datum in reader:
            print('Line: ' + str(LineNumber) + ' at time ' + TimeStampString + ', has price at: ' + str(datum['Last']['Price']))
        reader.close()
    #         break
    AvroByteStream.close()
#     break
FirstUnderlyingFile.close()


AssertionError: b'\x06SPX\xc0\x1f\x02\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0f\n\xd7\xa3p\xbd\xe6\xa4@\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6(\\\x8f\xc2\xe4\xa4@\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\xd7\xa3p\xbd\xe5\xa4@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x141525878000\x00\x00\x00\x00\x00\x00\x00\x00\xecQ\xb8\x1e\x85\xf6\xa4@\x85\xebQ\xb8\x1e\xe5\xa4@\xa4p=\n\xd7\xdf\xa4@\xd8\xf27^\xc2b\x0c\xda\xe5\x1eC(7\x08\xe9G'

In [10]:
CaptureOutputFile = open('/home/bill/SiftedData/JupyterCapture.txt', 'wt')
CaptureOutputFile.write(captured.stdout)
CaptureOutputFile.close()

FileNotFoundError: [Errno 2] No such file or directory: '/home/bill/SiftedData/JupyterCapture.txt'