In [3]:
#Import library
from highlib import Letter
from highlib import HighLight


## Objects used in the library

In [4]:
letterObj = Letter("a", "b", "c")
print(letterObj)

[a] [b]  [c]


## Reading in a file

In [5]:
high=HighLight("file.txt",'RomanFile')
lineArr = high.lines()
print(str(len(lineArr)) + " lines read in")

7 lines read in


In [6]:
for thisLine in lineArr:
    print(thisLine.line)

951212 050000.000 MONDEO_44 @C 269.7 2.0 10
// EVENT 951212 050300.000 BRAVO
// EVENT 951212 050300.000 CHARLIE
951212 050300.000 FORD_11 @C 354.7 2.1 14
951212 050200.000 COROLLA_44 @C 177.9 3.1 15
// EVENT 951212 050300.000 DELTA
951212 050300.000 COROLLA_44 @C 200 3.1 15


## Handling tokens

In [7]:
high.record("Date", "n/a", 0,'yellow')
high.record("Date", "n/a", 1,'yellow')
high.record("Venicle", "n/a", 2,'blue')

In [8]:
print(high.hashRecord)

{'empty': {0: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 1: {'name': 'Date', 'attr': 'n/a', 'color': 'yellow'}, 2: {'name': 'Venicle', 'attr': 'n/a', 'color': 'blue'}}}


In [9]:
for key in high.hashRecord:
    print(key)

empty


## Playing with regexp
Let's experiment with a self-aware token object

In [20]:
import re
from re import finditer

class Line():
    def __init__(self,start, end,text, chars):
        self.start=start
        self.end=end
        self.text=text
        self.chars = chars
    def tokens(self):
        tokens = []
        regExp = "\\S+"
        for match in finditer(regExp, self.text):
            tokens.append(Token(match.span(), match.group(), int(self.start), self.chars))
        return tokens
    
class CharIndex:
    def __init__(self, letter):
        self.letter = letter
        self.usages = []
    def __str__(self):
        message = "["+ self.letter+"]"
        for usage in self.usages:
            message+= "(T/F:" + usage.toolField + ", msg:" + usage.message + ")"
        return message

class SingleUsage:
    def __init__(self, toolField, message):
        self.toolField = toolField
        self.message = message
    
class Token():
    def __init__(self,span,text, lineStart, chars):
        self.span=span
        self.text=text
        self.lineStart = lineStart
        self.chars = chars
    def __str__(self):
        return "Start:["+ str(self.start())+"] End:["+str(self.end())+"] "+"Text:["+self.text+"]"
    def start(self):
        return self.lineStart + self.span[0]
    def end(self):
        return self.lineStart + self.span[1] - 1
    def record(self, tool, field, value, units):
        toolField = tool+"/"+field
        message = "Value:" + str(value) + " Units:" + str(units)        
 #       print("Looping through cells " + str(self.start()) + " to " + str(self.end()))
 #       print("Setting tool/field to:[" + toolField + "] Msg to:[" + message + "]")
        for i in range(self.lineStart + self.span[0], self.lineStart + self.span[1]):
            usage = SingleUsage(toolField, message)
            self.chars[i].usages.append(usage)

## Try out self-aware lines


In [29]:
#sampleLines = "MONDEO_44 @C 269.7 2.0 10\nMONDEO_44 @C 269.7  2.0 11\nMONDEO_45  @C 249.7  2.0 10\nMONDEO_46  @C 239.7 4.0 12\n"

with open('file.txt', 'r') as file:
    sampleLines = file.read()

strLines = sampleLines.splitlines()
lines = []
myName = "Trial Extractor"

chars = [None] * len(sampleLines)

print("have:" + str(len(sampleLines)) + "chars")

# initialise the char index
charCtr = 0
for char in sampleLines:
    # put letter into a struct
    charInd = CharIndex(char)
    chars[charCtr] = charInd
    charCtr += 1
    
# ok, break the file into self-aware lines
lineCtr = 0
for thisLine in strLines:
    thisLen = len(thisLine)
    newL = Line(str(lineCtr), str(lineCtr + thisLen), thisLine, chars)
    lines.append(newL)
    lineCtr += thisLen + 1

# process the lines
for thisLine in lines:
    tokens = thisLine.tokens()

    # check the type
    firstToken = tokens[0]
    if firstToken.text == "//":
        # event marker
        eventImporter = "Event importer"
        dateToken = tokens[2]
        dateToken.record(eventImporter,"Date", dateToken.text, "n/a")
        timeToken = tokens[3]
        timeToken.record(eventImporter,"Time", timeToken.text, "n/a")
        eventToken = tokens[4]
        eventToken.record(eventImporter,"Event", timeToken.text, "n/a")
    else:
        
        dateToken = tokens[0]
        dateToken.record(myName,"Date", dateToken.text, "n/a")
        
        timeToken = tokens[1]
        timeToken.record(myName,"Time", timeToken.text, "n/a")

        vehicleToken = tokens[2]
        vehicleVal = vehicleToken.text
        vehicleToken.record(myName,"Vehicle", vehicleVal,"n/a")  

        directionToken = tokens[4]
        directionVal = float(directionToken.text)
        directionToken.record(myName,"Direction", directionVal,"degs")  

        speedToken = tokens[5]
        speedVal = float(speedToken.text)
        speedToken.record(myName,"Speed", speedVal,"m/s")  

        speedToken = tokens[5]
        speedVal = float(speedToken.text)
        speedToken.record(myName,"Other Speed", speedVal,"m/s")  

        dirToken = tokens[6]
        directionVal = float(dirToken.text)
        dirToken.record(myName,"Direction", directionVal,"degs")


have:319chars


## Generating colors

In [30]:
import random
import math
import colorsys

myColors={} 

def colorFor(hashCode):
    if hashCode in myColors:
        return myColors[hashCode]
    else:
        hue = random.random()
        sat = random.random()
        rgb = colorsys.hsv_to_rgb(hue, sat, 0.9)
        r = int(rgb[0] * 255)
        g = int(rgb[1] * 255)
        b = int(rgb[2] * 255)
        newCol = '#%02x%02x%02x' % (r,g,b)
        myColors[hashCode] = newCol
        return newCol
    
print(colorFor("aaa"))
print(colorFor("bbb"))
print(colorFor("aaa")) # this should return the same color as the first one
print(colorFor("ddd"))


#cfc3e5
#9de5e3
#cfc3e5
#16e5d6


## Producting the target HTML file

In [31]:
lastHash = ""
fOut = open("out3.html", "w")
for charIndex in chars:
    letter = charIndex.letter
    thisHash = ""
    thisMessage = ""
    for usage in charIndex.usages:
        thisHash += usage.toolField
        needsNewLine = thisMessage != ""
        thisMessage += usage.toolField + ", " + usage.message
        if needsNewLine:
            thisMessage += "////"

    # generate/retrieve a color for this hash
    newColor = colorFor(thisHash)
    # are we already in hash?
    if(lastHash != ""):
        if(thisHash != ""):
            # is it the different to this one?
            if(lastHash != thisHash):
                # ok, close the span
                fOut.write("</span>")

                # start a new span
                fOut.write("<span title='"+thisMessage+"' style=\"background-color:" + newColor + "\"a>vvv")
        else:
            fOut.write("</span>")
            
    else:
        # do we have one now?
        if(thisHash != ""):
                fOut.write("<span title='"+thisMessage+"' style=\"background-color:" + newColor + "\">")
    
    # just check if it's newline
    if(letter == "\n"):
        fOut.write("</br>")
    else:
        fOut.write(letter)
   
    lastHash = thisHash

if(lastHash != ""):
    fOut.write("</span>")
    
fOut.close()

            


In [33]:
from IPython.display import HTML
HTML(filename="./out3.html")