In [None]:
# pre-install required libraries
import warnings
#%pip install -upgrade pip
%pip install spacy
%pip install ipywidgets
#%pip install -U jupyter

# suppress user warnings during execution
warnings.filterwarnings(action='ignore', category=UserWarning)


# Using the TemporalAnnotator
## Introduction
The TemporalAnnotator includes a series of custom spaCy pipeline components to perform matching on temporal terms or phrases as observed in archaeology texts. It identifies the following entity types:

| Entity Type | Description | Examples |
| ------------|-------------| --------:|
| CENTURY     | Ordinal century expressions | *early 15th century BC to late fifteenth century AD* |
| YEARSPAN    | Spans of years (possibly with prefixes and/or suffixes) | *early 1300 to late 1350 AD* |
| NAMEDPERIOD | Period labels from a specified [Perio.do](https://perio.do/en/) authority | *Bronze Age, Early Medieval, Victorian* |

The following example code tests the TemporalAnnotator with some example text. For looking up named periods it uses Perio.do authority id [p0kh9ds](http://n2t.net/ark:/99152/p0kh9ds) (references the Historic England Periods List)

In [None]:
# simple example using TemporalAnnotator on a passage of text
from rematch2.TemporalAnnotator import TemporalAnnotator

# example test input text copied from https://doi.org/10.5284/1100092
test_text = "This collection comprises site data(reports, images, GIS data and a project database) from an archaeological excavation at Lydney B Phase II, Archers Walk, Lydney, Gloucestershire undertaken by Cotswold Archaeology between February and May 2018. An area of 1.47ha was excavated within this part of a wider development area. The earliest remains comprised three broadly datable flints, all found as residual finds. An Early Bronze Age collared urn within a small pit may be the remains of a grave, although no human remains were found. The first evidence for occupation is from the Roman period, with finds spanning the 1st to 3rd centuries AD, with a clear focus within the 2nd to 3rd centuries. Two phases of Roman activity were identified, the first comprising cereal-processing ovens and two crescent-shaped ditches, one associated with metalworking debris. The later phase comprised stone founded buildings associated with wells, enclosures, trackways and a single cremation deposit. These seem to indicate a Romanised farm below the status of a villa. Little animal bone survived, but the enclosures are suggestive of livestock farming. Occupation seems to have ended in the mid 3rd century, although the reasons for this are not apparent. Further use of the site dates to the medieval period, between the late 12th and 15th centuries, when an agricultural building was constructed, probably an outlier of a manorial farm previously excavated to the west."

# required output format options: html|csv|json|dataframe|doc
# 'html' returns inline markup for visualising annotations in context
# 'dataframe' useful for visualising tabular data in python notebook
# 'csv' and 'json' are useful textual interchange formats
# 'doc' returns the spaCy document object for further processing
output_format = "html"  # options: html|csv|json|dataframe|doc

# if not specified, default ISO639-1 two character language code is "en"
# if not specified, default periodo id is "p0kh9ds" (Historic England periods list)
annotator = TemporalAnnotator(language="en", periodo_authority_id="p0kh9ds")

# process example text and display the results in required output format
results = annotator.annotateText(input_text=test_text, format=output_format)
display(results)


The following example tests the TemporalAnnotator on a range of multilingual example texts. It allows you to choose the language-specific test to run and the output format required

In [1]:
# UI to test TemporalAnnotator on a range of example texts
import ipywidgets as widgets
from IPython.display import display, HTML
from rematch2.TemporalAnnotator import TemporalAnnotator
from test_examples_multilingual import test_examples_multilingual

# TODO - choose these as checkboxes in UI

def run(btn):
    # clear any previous output
    output.clear_output(wait=True)

    # get the test text for the for the chosen id
    selected_test = next(
        filter(lambda test: test.get("language", "en") == dropdown_language.value, test_examples_multilingual), None)
    if(selected_test):
        language = selected_test.get("language", "en")        
        periodo_authority_id = selected_test.get("periodo_authority_id", "p0kh9ds")
        annotator = TemporalAnnotator(language=language, periodo_authority_id=periodo_authority_id)

        # get annotation results
        output_format = dropdown_format.value
        results = annotator.annotateText(
            input_text=selected_test.get("text", ""), format=output_format)
        # display annotation results
        with output:
            if(output_format == "html"):
                display(HTML(results))
            else:
                display(results)

# define language selector dropdown UI component
dropdown_language = widgets.Dropdown(
    options=[
        ["German", "de"],
        ["English", "en"],
        ["Spanish", "es"],
        ["French", "fr"],
        ["Italian", "it"],
        ["Dutch", "nl"],
        ["Norwegian", "no"],
        ["Swedish", "sv"]
    ],
    value="en",
    description='Language:',
    disabled=False
)

# define output format dropdown UI component
dropdown_format = widgets.Dropdown(
    options=[
        ["HTML", "html"],
        ["Tabular", "dataframe"]
    ],
    value="html",
    description="Format:",
    disabled=False
)

# define and display other UI components
button_go = widgets.Button(description="Go")
input = widgets.HBox([dropdown_language, dropdown_format, button_go])
output = widgets.Output(layout=widgets.Layout(
    overflow='scroll', border='1px solid black', height='500px'))
display(input, output)

# what to do when the button is clicked
button_go.on_click(run)


HBox(children=(Dropdown(description='Language:', index=1, options=(['German', 'de'], ['English', 'en'], ['Span…

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…