<div style="text-align:center; font-size: 120%">
<h1>Time Spans</h1>
<br>
<span>A project to identify and mark time-markers in clauses and the clause chains that they affect.</span>
</div>

## Get ETCBC Data

In [1]:
import collections as collect
import itertools
from IPython.display import display, HTML

In [2]:
from tf.fabric import Fabric

TF = Fabric(modules='Hebrew/etcbc4c')
api = TF.load('''
              book chapter verse 
              function pdp vt
              lex lex_utf8 g_word_utf8
              mother tab
              ''')
api.makeAvailableIn(globals())

This is Text-Fabric 2.3.0
Api reference : https://github.com/ETCBC/text-fabric/wiki/Api
Tutorial      : https://github.com/ETCBC/text-fabric/blob/master/docs/tutorial.ipynb
Data sources  : https://github.com/ETCBC/text-fabric-data
Data docs     : https://etcbc.github.io/text-fabric-data
Shebanq docs  : https://shebanq.ancient-data.org/text
Slack team    : https://shebanq.slack.com/signup
Questions? Ask shebanq@ancient-data.org for an invite to Slack
109 features found and 0 ignored
  0.00s loading features ...
   |     0.01s B book                 from /Users/Cody/github/text-fabric-data/Hebrew/etcbc4c
   |     0.01s B chapter              from /Users/Cody/github/text-fabric-data/Hebrew/etcbc4c
   |     0.01s B verse                from /Users/Cody/github/text-fabric-data/Hebrew/etcbc4c
   |     0.19s B g_word_utf8          from /Users/Cody/github/text-fabric-data/Hebrew/etcbc4c
   |     0.23s B lex_utf8             from /Users/Cody/github/text-fabric-data/Hebrew/etcbc4c
   |     0.07s

In [11]:
def getLabel(clauseNode):
    '''
    convert a Text-Fabric node number into a label
    return a label of form: book.chapter.verse.clause
    '''
    bookNode = L.u(clauseNode, otype='book')[0]
    chapterNode = L.u(clauseNode, otype='chapter')[0]
    verseNode = L.u(clauseNode, otype='verse')[0]
    verseClauses = list(L.d(verseNode, otype='clause_atom'))
    clauseNum = verseClauses.index(clauseNode) + 1
    clauseLabel = '{}.{}.{}.{}'.format(F.book.v(bookNode), 
                                       F.chapter.v(chapterNode), 
                                       F.verse.v(verseNode), 
                                       clauseNum)
    return clauseLabel
    
def getEtcbcData(corpus):
    '''
    Gather the needed data from the Text-Fabric module;
    label clauses with a simple reference tag (ex. Psalms.1.1.1)
    return an embedded dictionary keyed by data labels...;
    '''
    # the corpus argument is a string; 
    # find the node number with a feature value that matches the corpus string
    # there will be only 1 result, so we use 'next' to pull the first result from the generator 
    corpus = next(book for book in F.otype.s('book') if F.book.v(book) == corpus)
    
    # now we pull all the clause node numbers from the corpus:
    clauseNodes = L.d(corpus, otype='clause_atom')

    # create a mapping for each mother clause to its daughter clauses
    # in Text-Fabric that information is stored the other way around
    # so we build up that data with a defaultdict with a list value:
    motherToDaughters = collect.defaultdict(list)
    for clause in clauseNodes:
        if F.tab.v(clause) == 0: # do not store parallel daughter clauses
            continue
        daughter = getLabel(clause)
        for mother in E.mother.f(clause):
            motherToDaughters[mother].append(daughter)
    
    # we will store all of the clause data in this dict:
    clauses = collect.OrderedDict()
    
    # iterate over the clause node numbers and gather the data to be returned
    # the data is stored in the clauses ordered dictionary
    for clause in clauseNodes:
        clauseLabel = getLabel(clause)
        wordNodes = L.d(clause, otype='word')
        text = T.text(wordNodes)
        indentation = F.tab.v(clause)
        phraseNodes = L.d(clause, otype='phrase')
        timePhrases = tuple(phrase for phrase in phraseNodes if F.function.v(phrase) == 'Time')
        timePhraseText = tuple(T.text(L.d(phrase, otype='word')) for phrase in timePhrases)
        daughters = motherToDaughters.get(clause, [])
        mother = getLabel(E.mother.f(clause)[0]) if E.mother.f(clause) else None
        clauses[clauseLabel] = {
                                'text': text,
                                'timePhraseText': timePhraseText,
                                'daughters': daughters,
                                'mother': mother,
                                'timeCategories':[],
                                'indentation':indentation,
                                'etcbcClauseNode': clause #!! REMOVE LATER
                                }
    return clauses

# Process Time Spans

In [16]:
def climbClauseTree(clause, clauseDict, span, coverage):
    '''
    recursively climb the clause tree and build the time spans
    if a time marker occurs inside the tree, break the span 
    span is a list that accrues as the code descends the tree
    coverage is a set that tracks which clauses are accounted for
    '''    
    for daughter in clauseDict[clause]['daughters']:
        timeMarkers = clauseDict[daughter]['timePhraseText']
        if timeMarkers:
            continue
        else:
            span.append(daughter)
            coverage.add(daughter)
            climbClauseTree(daughter, clauseDict, span, coverage)

def buildTimeSpans(clauseDict):
    '''
    loop through all clauses and call the climbClauseTree function
    each timespan is stored in timeSpans dict, keyed by its first clause
    coverage tracks which clauses are accounted for to avoid overlap
    '''
    timeSpans = collect.OrderedDict()
    coverage = set()
    for clause, cData in clauseDict.items():
        if clause not in coverage and cData['timePhraseText']:
            span = []
            span.append(clause)
            climbClauseTree(clause, clauseDict, span, coverage)
            timeSpans[clause] = span
    return timeSpans

In [80]:
def writeHTML(clauseDict, spanDict, title):
    '''
    compile HTML code to display:
        1. plain text clauses
        2. indent clauses based on relationship to each other
        3. shade each clause within each time-span
    '''
    
    # open and assign the required HTML templates: 
    with open('HTMLTemplates/doc.txt') as docTemplate, \
             open('HTMLTemplates/dataPlain.txt') as dataPlainTemplate, \
             open('HTMLTemplates/dataColor.txt') as dataColorTemplate: 
        document = docTemplate.read()
        dataPlain = dataPlainTemplate.read().replace('\n','').replace('\t','')
        dataColor = dataColorTemplate.read().replace('\n','').replace('\t','')

    # basic HTML characters/formatting 
    formatting = {
                'tab' : '&nbsp&nbsp&nbsp&nbsp',
                'colors' : itertools.cycle(('#addfff','#a3e2a1'))
                 }

    # clauses in this set will receive color formatting
    inTimeSpan = set(clause for span in spanDict 
                     for clause in spanDict[span])
    
    HTMLBody = ''

    for clause, clauseData in clauseDict.items():

        if clause in spanDict:    # receives special formatting
            firstClause = clause
            switchColor = next(formatting['colors'])
            for spanClause in spanDict[firstClause]:
                spanClauseData = clauseDict[spanClause]
                currentColor = switchColor
                clauseLabel = formatting['tab'] + spanClause
                indentation = formatting['tab'] * spanClauseData['indentation']
                text = spanClauseData['text'] + indentation
                formattedClause = dataColor.format(color=currentColor, # fill HTML template
                                                   text=text,
                                                   label=clauseLabel)
                HTMLBody += formattedClause # full code
                
        elif clause not in inTimeSpan: # do not receive special formatting
            clauseLabel = formatting['tab'] + clause
            indentation = formatting['tab'] * clauseData['indentation']
            text = clauseData['text'] + indentation
            formattedClause = dataPlain.format(text=text,
                                               label=clauseLabel)
            HTMLBody += formattedClause
            
        else:
            continue
            
    HTMLDocument = document.format(data=HTMLBody,
                                   title=title)
    return HTMLDocument

In [90]:
corpus = 'Genesis'
test = getEtcbcData(corpus)

In [91]:
spans = buildTimeSpans(test)

In [92]:
title = fr'Time Spans in {corpus}'

timeSpanDoc = writeHTML(test, spans, title)
with open('test.html', 'w') as file:
    file.write(timeSpanDoc)