In [1]:
#Import library
from highlib import Letter
from highlib import HighLight


## Objects used in the library

In [2]:
letterObj = Letter("a", "b", "c")
print(letterObj)

[a] [b]  [c]


## Reading in a file

In [3]:
high=HighLight("file.txt",'RomanFile')
lineArr = high.lines()
print(str(len(lineArr)) + " lines read in")

4 lines read in


In [4]:
for thisLine in lineArr:
    print(thisLine.line)

951212 050000.000 MONDEO_44 @C 269.7 2.0 10
122234 423444.000 FORD_11 @C 354.7 2.1 14
951212 050200.000 COROLLA_44 @C 177.9 3.1 15
951212 050200.000 COROLLA_44 @C 200 3.1 15


## Handling tokens

In [5]:
high.record("Date", "n/a", 0,'yellow')
high.record("Date", "n/a", 1,'yellow')
high.record("Venicle", "n/a", 2,'blue')

In [6]:
print(high.hashRecord)

{'empty': {0: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 1: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 2: {'name': 'Venicle', 'attr': 'n/a', 'color': 'blue'}}}


In [7]:
for key in high.hashRecord:
    print(key)

empty


## Playing with regexp
Let's experiment with a self-aware token object

In [8]:
import re
from re import finditer

class Line():
    def __init__(self,start, end,text, chars):
        self.start=start
        self.end=end
        self.text=text
        self.chars = chars
    def tokens(self):
        tokens = []
        regExp = "\\S+"
        for match in finditer(regExp, self.text):
            tokens.append(Token(match.span(), match.group(), int(self.start), self.chars))
        return tokens

class Token():
    def __init__(self,span,text, lineStart, chars):
        self.span=span
        self.text=text
        self.lineStart = lineStart
        self.chars = chars
    def __str__(self):
        return "Start:["+ str(self.start())+"] End:["+str(self.end())+"] "+"Text:["+self.text+"]"
    def start(self):
        return self.lineStart + self.span[0]
    def end(self):
        return self.lineStart + self.span[1]
    def record(self, tool, field, value, units):
        toolField = tool+"/"+field
        message = "Value:" + str(value) + " Units:" + str(units)        
        print("Looping through cells " + str(self.lineStart + self.span[0]) + " to " + str(self.lineStart + self.span[1]))
        print("Setting tool/field to:[" + toolField + "] Msg to:[" + message + "]")
        for i in range(self.lineStart + self.span[0], self.lineStart + self.span[1]):
            self.chars[i] = toolField

## Try out self-aware lines


In [9]:
sampleLines = "MONDEO_44 @C 269.7 2.0 10\nMONDEO_44 @C 269.7  2.0 11\nMONDEO_45  @C 249.7  2.0 10\nMONDEO_46  @C 239.7 4.0 12\n"
strLines = sampleLines.splitlines()
lines = []
ctr = 0
myName = "Trial Extractor"

charLen = len(sampleLines)

chars = [None] * charLen  

# ok, break the file into self-aware lines
for thisLine in strLines:
    thisLen = len(thisLine)
    lines.append(Line(str(ctr), str(ctr + thisLen), thisLine, chars))
    ctr += thisLen + 1

# process the lines
for thisLine in lines:
    print("== New line ==")
    tokens = thisLine.tokens()
    
    print("= Vehicle =")
    vehicleToken = tokens[0]
    vehicleVal = vehicleToken.text
    vehicleToken.record(myName,"Vehicle", vehicleVal,"n/a")  

    print("= Speed =")
    speedToken = tokens[3]
    speedVal = float(speedToken.text)
    speedToken.record(myName,"Speed", speedVal,"m/s")  
    
    print("= Direction =")
    dirToken = tokens[4]
    directionVal = float(dirToken.text)
    dirToken.record(myName,"Direction", directionVal,"degs")
    
print(list(chars))    

== New line ==
= Vehicle =
Looping through cells 0 to 9
Setting tool/field to:[Trial Extractor/Vehicle] Msg to:[Value:MONDEO_44 Units:n/a]
= Speed =
Looping through cells 19 to 22
Setting tool/field to:[Trial Extractor/Speed] Msg to:[Value:2.0 Units:m/s]
= Direction =
Looping through cells 23 to 25
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:10.0 Units:degs]
== New line ==
= Vehicle =
Looping through cells 26 to 35
Setting tool/field to:[Trial Extractor/Vehicle] Msg to:[Value:MONDEO_44 Units:n/a]
= Speed =
Looping through cells 46 to 49
Setting tool/field to:[Trial Extractor/Speed] Msg to:[Value:2.0 Units:m/s]
= Direction =
Looping through cells 50 to 52
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:11.0 Units:degs]
== New line ==
= Vehicle =
Looping through cells 53 to 62
Setting tool/field to:[Trial Extractor/Vehicle] Msg to:[Value:MONDEO_45 Units:n/a]
= Speed =
Looping through cells 74 to 77
Setting tool/field to:[Trial Extractor/Speed] Msg to:[