In [1]:
# install required libraries
import warnings
%pip install --upgrade pip
%pip install spacy
%pip install ipywidgets
#%pip install -U jupyter

# suppress user warnings during execution
warnings.filterwarnings(action='ignore', category=UserWarning)


Collecting pip
  Downloading pip-22.3-py3-none-any.whl (2.1 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.0/2.1 MB[0m [31m64.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.1.2
    Uninstalling pip-22.1.2:
      Successfully uninstalled pip-22.1.2
Successfully installed pip-22.3
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


# Using Vocabulary-Based Pipeline Components
## Introduction
A series of experimental pipeline components have been developed to perform vocabulary-based lookup matching on archaeological terms. Note that these pipeline components are based on existing monolingual (English) thesauri so they can only currently be used to identify and tag English language terms. The following components are demonstrated here:

| Component Name     | Entity Type   | Description   | Examples  |
|--------------------|---------------|---------------| ----------|
| archobject_ruler   | OBJECT        | Matching on terms from the [MDA Archaeological Objects Thesaurus](http://purl.org/heritagedata/schemes/mda_obj) | *axe, sherds, ring, forks* |
| archscience_ruler  | ARCHSCIENCE   | Matching on terms from the [FISH Archaeological Sciences Thesaurus](http://purl.org/heritagedata/schemes/560) | *lead isotope dating, palynology* |
| material_ruler     | MATERIAL      | Matching on terms from the [FISH Building Materials Thesaurus](http://purl.org/heritagedata/schemes/eh_tbm) | *brass, quartz, pine, bone, leather* |
| monument_ruler     | MONUMENT      | Matching on terms from the [FISH Thesaurus of Monument Types](http://purl.org/heritagedata/schemes/eh_tmt2) | *midden, weighbridge, kiln* |

In [4]:
# Using a rematch2 vocabulary-based component
import spacy
from spacy import displacy
from rematch2 import components
import ipywidgets as widgets

# use a predefined spaCy pipeline, disabling the default NER component
nlp = spacy.load("en_core_web_sm", disable = ['ner'])
# add rematch2 component(s) to the end of the pipeline
nlp.add_pipe("archobject_ruler", last=True)
# process example text using the modified pipeline 
txt = '''
Aside from three residual flints, none closely datable, the earliest remains comprised a small assemblage of Roman pottery and ceramic building material, also residual and most likely derived from a Roman farmstead found immediately to the north within the Phase II excavation area. A single sherd of Anglo-Saxon grass-tempered pottery was also residual.
The earliest features, which accounted for the majority of the remains on site, relate to medieval agricultural activity focused within a large enclosure. There was little to suggest domestic occupation within the site: the pottery assemblage was modest and well abraded, whilst charred plant remains were sparse, and, as with some metallurgical residues, point to waste disposal rather than the locations of processing or consumption. A focus of occupation within the Rodley Manor site, on higher ground 160m to the north-west, seems likely, with the currently site having lain beyond this and providing agricultural facilities, most likely corrals and pens for livestock. Animal bone was absent, but the damp, low-lying ground would have been best suited to cattle. An assemblage of medieval coins recovered from the subsoil during a metal detector survey may represent a dispersed hoard.
'''
doc = nlp(txt)

# define output component
output = widgets.Output(layout=widgets.Layout(overflow='scroll', border='1px solid gray', height='250px'))
output.clear_output()
with output:
    display(displacy.render(doc, style="ent"))
    #display(doc.ents)
display(output)

TypeError: 'module' object is not callable

In [None]:
# Testing each of the vocabulary-based components on user-entered text 
import spacy
from spacy import displacy
from rematch2 import components
import ipywidgets as widgets
from IPython.display import display, HTML

def run(btn):
    # use predefined pipeline, disabling the default NER component
    nlp = spacy.load("en_core_web_sm", exclude=['ner'])

    # add the required pipeline component(s) to the end of the pipeline
    nlp.add_pipe(dropdown_entitytype.value, last=True)            
    
    # display the current pipeline
    # print(nlp.pipe_names)

    # process some example text using the modified pipeline 
    doc = nlp(textarea_input.value)

    # show highlighted entities in the example text
    with output:
        output.clear_output(wait=True)
        display(HTML(displacy.render(doc, style="ent")))  

        #ent.ent_id_, ent.text, ent.label_
    
# example input from https://doi.org/10.5284/1100086
example_input = '''
This collection comprises site data (images and CAD) from an archaeological evaluation which was undertaken by Cotswold Archaeology in October 2015 at Knotwood Fields Farm, Northamptonshire. The evaluation was undertaken to inform a planning application to South Northamptonshire Council (SNC; the local planning authority) for the development of a solar farm.
The fieldwork comprised the excavation of fourteen trenches. A previous geophysical survey identified a number of anomalies representing potential archaeological features; these comprised sub-circular anomalies, linear anomalies and back-filled pits, indicative of former settlement activity of probable late prehistoric to Roman date. The evaluation recorded a number of curvilinear ditches, which most likely represent small enclosures and a roundhouse. Pottery dating from the Iron Age was recovered from the silted fills of these ditches. Broadly contemporaneous boundary ditches, containing pottery dating to the Iron Age, were also identified. These features probably relate to settlement activity and land division, focused at the north-eastern end of the site. Medieval plough furrows were indicated across the entire site by the geophysical survey; variations in their alignment indicates that the site covers parts two or more former open fields.
A number of undated, but probably post-medieval/modern, ditches corresponding to a north-west/south-east oriented field system were identified within the south-eastern part of the site. There was a good correlation between the evaluation and the geophysical survey results, although there were a small number of archaeological features which had not been detected by the survey, as well as limited geophysical anomalies which were not found to correspond to below-ground archaeological remains.'''

# define UI input textarea component
textarea_input = widgets.Textarea(
    value=example_input,
    placeholder='Type something',
    description='Text to process:',
    disabled=False,
    layout={'height': '100%', 'width': 'auto'}
)
textarea_container = widgets.VBox([textarea_input], layout={"height": "250px", "width": "auto"})


# define UI entity type dropdown component
dropdown_entitytype = widgets.Dropdown(
    options = [
        ["Objects", "archobject_ruler"],
        ["Monuments", "monument_ruler"], 
        ["Archaeological Sciences", "archscience_ruler"], 
        ["Materials", "material_ruler"]        
    ],
    value="monument_ruler",
    description='Entity type:',
    disabled=False
)

items = [
    {"key": "archobject_ruler", "lbl": "Objects"},
    {"key": "Monuments", "lbl": "monument_ruler"},
    {"key": "Archaeological Sciences", "lbl": "archscience_ruler"},
    {"key": "Materials", "lbl": "material_ruler"}
]
chk = widgets.VBox([
    widgets.Checkbox(
        value=False, 
        description=item.get("lbl"), 
        disabled=False,
        indent=False
    ) for item in items
])
display(chk)

chk2 = widgets.Checkbox(
    value=False,
    description="fred",
    disabled=False,
    indent=False
)
box = widgets.VBox([chk2])
display(box)

# define UI subkmit button component
button_go = widgets.Button(description="Go", button_style='primary')

# group  dropdown and button together
entity_type = widgets.HBox([dropdown_entitytype, button_go])

# define output component
output = widgets.Output(layout=widgets.Layout(
    overflow='scroll', border='1px solid gray', height='250px'))

# what to do when the button is clicked..
button_go.on_click(run)

#display(box)
# display the UI components
display(textarea_container)
display(entity_type)
display(output)


In [1]:
#isolate display code to see whats wrong...
#]%pip install ipywidgets
import ipywidgets as widgets
from IPython.display import display

chk = widgets.Checkbox(
    value=False,
    description="fred",
    disabled=False,
    indent=False
)
#box = widgets.HBox([chk])

#output = widgets.Output(layout=widgets.Layout(
    #overflow='scroll', border='1px solid gray', height='250px'))

display(chk)
#display(chk)


Checkbox(value=False, description='fred', indent=False)