In [121]:
#Import library
from highlib import Letter
from highlib import HighLight


## Objects used in the library

In [122]:
letterObj = Letter("a", "b", "c")
print(letterObj)

[a] [b]  [c]


## Reading in a file

In [123]:
high=HighLight("file.txt",'RomanFile')
lineArr = high.lines()
print(str(len(lineArr)) + " lines read in")

4 lines read in


In [124]:
for thisLine in lineArr:
    print(thisLine.line)

951212 050000.000 MONDEO_44 @C 269.7 2.0 10
122234 423444.000 FORD_11 @C 354.7 2.1 14
951212 050200.000 COROLLA_44 @C 177.9 3.1 15
951212 050200.000 COROLLA_44 @C 200 3.1 15


## Handling tokens

In [125]:
high.record("Date", "n/a", 0,'yellow')
high.record("Date", "n/a", 1,'yellow')
high.record("Venicle", "n/a", 2,'blue')

In [126]:
print(high.hashRecord)

{'empty': {0: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 1: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 2: {'name': 'Venicle', 'attr': 'n/a', 'color': 'blue'}}}


In [127]:
for key in high.hashRecord:
    print(key)

empty


## Playing with regexp
Let's experiment with a self-aware token object

In [132]:
import re
from re import finditer

class Token():
    def __init__(self,span,text, lineStart):
        self.span=span
        self.text=text
        self.lineStart = lineStart
    def __str__(self):
        return "Start:["+ str(self.start())+"] End:["+str(self.end())+"] "+"Text:["+self.text+"]"
    def start(self):
        return self.lineStart + self.span[0]
    def end(self):
        return self.lineStart + self.span[1]
    def record(self, tool, field, value, units):
        toolField = tool+"/"+field
        message = "Value:" + str(value) + " Units:" + str(units)        
        print("Looping through rows " + str(self.span))
        print("Setting tool/field to:[" + toolField + "] Msg to:[" + message + "]")    

## Trial the Token object

In [140]:
tokens = []
pretendLineStartChar = 23 # in real life the line object will know it's starting index
regExp = "\\S+"
sampleText = "951212 050000.000 MONDEO_44   @C   269.7   2.0      10"
for match in finditer(regExp, sampleText):
    tokens.append(Token(match.span(), match.group(), pretendLineStartChar))

# let's have a look at the tokens
for token in tokens:
    print(token)
    

Start:[23] End:[29] Text:[951212]
Start:[30] End:[40] Text:[050000.000]
Start:[41] End:[50] Text:[MONDEO_44]
Start:[53] End:[55] Text:[@C]
Start:[58] End:[63] Text:[269.7]
Start:[66] End:[69] Text:[2.0]
Start:[75] End:[77] Text:[10]


## Play with a token

In [141]:
dirToken = tokens[4]
print(dirToken)

print(dirToken.start())

Start:[58] End:[63] Text:[269.7]
58


In [142]:
directionVal = float(dirToken.text)
print(directionVal)

269.7


In [143]:
myName = "Trial Extractor"
dirToken.record(myName,"Direction", directionVal,"degs")

Looping through rows (35, 40)
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:269.7 Units:degs]


In [139]:
dateToken = tokens[0]
timeToken = tokens[1]

dateTimeValue = dateToken.text + "-" + timeToken.text

dateToken.record("date finder", "Date", dateToken.text, "n/a")
dateToken.record("date-time finder", "DateTime", dateTimeValue, "n/a")
timeToken.record("date-time finder", "DateTime", dateTimeValue, "n/a")


Looping through rows (0, 6)
Setting tool/field to:[date finder/Date] Msg to:[Value:951212 Units:n/a]
Looping through rows (0, 6)
Setting tool/field to:[date-time finder/DateTime] Msg to:[Value:951212-050000.000 Units:n/a]
Looping through rows (7, 17)
Setting tool/field to:[date-time finder/DateTime] Msg to:[Value:951212-050000.000 Units:n/a]
