In [1]:
#Import library
from highlib import Letter
from highlib import HighLight


## Objects used in the library

In [2]:
letterObj = Letter("a", "b", "c")
print(letterObj)

[a] [b]  [c]


## Reading in a file

In [3]:
high=HighLight("file.txt",'RomanFile')
lineArr = high.lines()
print(str(len(lineArr)) + " lines read in")

4 lines read in


In [4]:
for thisLine in lineArr:
    print(thisLine.line)

951212 050000.000 MONDEO_44 @C 269.7 2.0 10
122234 423444.000 FORD_11 @C 354.7 2.1 14
951212 050200.000 COROLLA_44 @C 177.9 3.1 15
951212 050200.000 COROLLA_44 @C 200 3.1 15


## Handling tokens

In [5]:
high.record("Date", "n/a", 0,'yellow')
high.record("Date", "n/a", 1,'yellow')
high.record("Venicle", "n/a", 2,'blue')

In [6]:
print(high.hashRecord)

{'empty': {0: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 1: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 2: {'name': 'Venicle', 'attr': 'n/a', 'color': 'blue'}}}


In [7]:
for key in high.hashRecord:
    print(key)

empty


## Playing with regexp
Let's experiment with a self-aware token object

In [51]:
import re
from re import finditer

class Line():
    def __init__(self,start, end,text, chars):
        self.start=start
        self.end=end
        self.text=text
        self.chars = chars
    def tokens(self):
        tokens = []
        regExp = "\\S+"
        for match in finditer(regExp, self.text):
            tokens.append(Token(match.span(), match.group(), int(self.start), self.chars))
        return tokens
    
class CharIndex:
    def __init__(self, letter):
        self.letter = letter
        self.usages = []
    def __str__(self):
        message = "["+ self.letter+"]"
        for usage in self.usages:
            message+= "(T/F:" + usage.toolField + ", msg:" + usage.message + ")"
        return message

class SingleUsage:
    def __init__(self, toolField, message):
        self.toolField = toolField
        self.message = message
    
class Token():
    def __init__(self,span,text, lineStart, chars):
        self.span=span
        self.text=text
        self.lineStart = lineStart
        self.chars = chars
    def __str__(self):
        return "Start:["+ str(self.start())+"] End:["+str(self.end())+"] "+"Text:["+self.text+"]"
    def start(self):
        return self.lineStart + self.span[0]
    def end(self):
        return self.lineStart + self.span[1] - 1
    def record(self, tool, field, value, units):
        toolField = tool+"/"+field
        message = "Value:" + str(value) + " Units:" + str(units)        
        print("Looping through cells " + str(self.start()) + " to " + str(self.end()))
        print("Setting tool/field to:[" + toolField + "] Msg to:[" + message + "]")
        for i in range(self.lineStart + self.span[0], self.lineStart + self.span[1]):
            usage = SingleUsage(toolField, message)
            self.chars[i].usages.append(usage)

## Try out self-aware lines


In [107]:
sampleLines = "MONDEO_44 @C 269.7 2.0 10\nMONDEO_44 @C 269.7  2.0 11\nMONDEO_45  @C 249.7  2.0 10\nMONDEO_46  @C 239.7 4.0 12\n"
strLines = sampleLines.splitlines()
lines = []
myName = "Trial Extractor"

chars = [None] * len(sampleLines)

print("have:" + str(len(sampleLines)) + "chars")

# initialise the char index
charCtr = 0
for char in sampleLines:
    # put letter into a struct
    charInd = CharIndex(char)
    chars[charCtr] = charInd
    charCtr += 1
    
# ok, break the file into self-aware lines
lineCtr = 0
for thisLine in strLines:
    thisLen = len(thisLine)
    newL = Line(str(lineCtr), str(lineCtr + thisLen), thisLine, chars)
    lines.append(newL)
    lineCtr += thisLen + 1

# process the lines
for thisLine in lines:
    print("== New line ==")
    tokens = thisLine.tokens()
    
    print("= Vehicle =")
    vehicleToken = tokens[0]
    vehicleVal = vehicleToken.text
    vehicleToken.record(myName,"Vehicle", vehicleVal,"n/a")  

    print("= Direction =")
    directionToken = tokens[2]
    directionVal = float(directionToken.text)
    directionToken.record(myName,"Direction", directionVal,"degs")  

    print("= Speed =")
    speedToken = tokens[3]
    speedVal = float(speedToken.text)
    speedToken.record(myName,"Speed", speedVal,"m/s")  

    print("= Other speed =")
    speedToken = tokens[3]
    speedVal = float(speedToken.text)
    speedToken.record(myName,"Other Speed", speedVal,"m/s")  

    
    print("= Direction =")
    dirToken = tokens[4]
    directionVal = float(dirToken.text)
    dirToken.record(myName,"Direction", directionVal,"degs")


have:108chars
== New line ==
= Vehicle =
Looping through cells 0 to 8
Setting tool/field to:[Trial Extractor/Vehicle] Msg to:[Value:MONDEO_44 Units:n/a]
= Direction =
Looping through cells 13 to 17
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:269.7 Units:degs]
= Speed =
Looping through cells 19 to 21
Setting tool/field to:[Trial Extractor/Speed] Msg to:[Value:2.0 Units:m/s]
= Other speed =
Looping through cells 19 to 21
Setting tool/field to:[Trial Extractor/Other Speed] Msg to:[Value:2.0 Units:m/s]
= Direction =
Looping through cells 23 to 24
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:10.0 Units:degs]
== New line ==
= Vehicle =
Looping through cells 26 to 34
Setting tool/field to:[Trial Extractor/Vehicle] Msg to:[Value:MONDEO_44 Units:n/a]
= Direction =
Looping through cells 39 to 43
Setting tool/field to:[Trial Extractor/Direction] Msg to:[Value:269.7 Units:degs]
= Speed =
Looping through cells 46 to 48
Setting tool/field to:[Trial Extractor/Sp

## Generating colors

In [128]:
import random
import math
import colorsys

myColors={} 

def colorFor(hashCode):
    if hashCode in myColors:
        return myColors[hashCode]
    else:
        r = random.random()
        rgb = colorsys.hsv_to_rgb(r, 0.5, 0.4)
        r = int(rgb[0] * 255)
        g = int(rgb[1] * 255)
        b = int(rgb[2] * 255)
        newCol = '#%02x%02x%02x' % (r,g,b)
        myColors[hashCode] = newCol
        return newCol
    
print(colorFor("aaa"))
print(colorFor("bbb"))
print(colorFor("aaa")) # this should return the same color as the first one
print(colorFor("ddd"))


#336651
#3b3366
#336651
#3c6633


## Producting the target HTML file

In [130]:
lastHash = ""
fOut = open("out3.html", "w")
for charIndex in chars:
    letter = charIndex.letter
    thisHash = ""
    thisMessage = ""
    for usage in charIndex.usages:
        thisHash += usage.toolField
        needsNewLine = thisMessage != ""
        thisMessage += usage.toolField + ", " + usage.message
        if needsNewLine:
            thisMessage += "////"

    # generate/retrieve a color for this hash
    newColor = colorFor(thisHash)
    # are we already in hash?
    if(lastHash != ""):
        if(thisHash != ""):
            # is it the different to this one?
            if(lastHash != thisHash):
                # ok, close the span
                fOut.write("</span>")

                # start a new span
                fOut.write("<span title='"+thisMessage+"' style=\"background-color:" + newColor + "\"a>vvv")
        else:
            fOut.write("</span>")
            
    else:
        # do we have one now?
        if(thisHash != ""):
                fOut.write("<span title='"+thisMessage+"' style=\"background-color:" + newColor + "\">")
    
    # just check if it's newline
    if(letter == "\n"):
        fOut.write("</br>")
    else:
        fOut.write(letter)
   
    lastHash = thisHash

if(lastHash != ""):
    fOut.write("</span>")
    
fOut.close()

            
