diff --git a/changelog.md b/changelog.md index e8ef6dafa6..7db6414ad3 100644 --- a/changelog.md +++ b/changelog.md @@ -7,6 +7,8 @@ - Handling intra-word linebreak as pollution : adds a pollution pattern that detects intra-word linebreak, which can then be removed in the `get_text` method - Qualifiers can process `Span` or `Doc` : this feature especially makes it easier to nest qualifiers components in other components - New label_weights parameter in eds.span_classifier`, which allows the user to set per label-value loss weights during training +- New `edsnlp.data.converters.MarkupToDocConverter` to convert Markdown or XML-like markup to documents, which is particularly useful to create annotated documents from scratch (e.g., for testing purposes). +- New [Metrics](https://aphp.github.io/edsnlp/master/metrics/) documentation page to document the available metrics and how to use them. ### Fixed diff --git a/docs/assets/images/ner_metrics_example.png b/docs/assets/images/ner_metrics_example.png new file mode 100644 index 0000000000..09d50bd458 Binary files /dev/null and b/docs/assets/images/ner_metrics_example.png differ diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index da8fe9706e..90719615e9 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -189,6 +189,7 @@ a.discrete-link { .sourced-heading > a { font-size: 1rem; align-content: center; + white-space: nowrap; } .doc-param-details .subdoc { @@ -207,3 +208,101 @@ a.discrete-link { margin: 0; font-weight: normal; } + +/*.chip { + position: relative; + box-sizing: content-box; + display: inline-block; + padding: 2px 1px; + margin: 1px 0px 14px; + border-radius: 6px; + font-style: normal; + background: #dae8fc; + border: 1px solid #6c8ebf; + --border-color: #6c8ebf; + white-space: nowrap; +} + +.chip::after { + content: attr(data-chip); + position: absolute; + right: -1px; + background: white; + border: 1px solid var(--border-color); + border-radius: 3px; + line-height: 1; + top: calc(100% - 6px); + box-sizing: border-box; +}*/ + +.chip { + position: relative; + box-sizing: content-box; + display: inline-block; + padding: 0 0 0 2px; + margin: 1px 0px; + border-radius: 4px; + font-style: normal; + background: #dae8fc; + border: 1px solid #6c8ebf; + --border-color: #6c8ebf; + white-space: nowrap; +} + +.chip::after { + content: attr(data-chip); + display: inline-block; + right: -1px; + background: white; + border: 1px solid var(--border-color); + border-radius: 0px 3px 3px 0px; + padding: 0 1px; + margin: -2px -2px -2px 2px; + box-sizing: border-box; +} + +.chip.tp { + background-color: #cef8ce; + border-color: #50b950; + --border-color: #50b950; +} + +.chip-green { + display: inline-block; + padding: 2px 2px; + margin: 1px 1px; + border-radius: 6px; + font-style: normal; + background: #cef8ce; + border: 1px solid #50b950; + white-space: nowrap; +} + +.chip-red { + display: inline-block; + padding: 2px 2px; + margin: 1px 1px; + border-radius: 6px; + font-style: normal; + background: #f8cecc; + border: 1px solid #b95450; + white-space: nowrap +} + +.chip.fp, .chip.fn { + background-color: #f8cecc; + border-color: #b95450; + --border-color: #b95450; +} + +.chip.na { + display: inline-block; + padding: 2px 2px; + margin: 1px 1px; + border-radius: 6px; + font-style: normal; + background: #efefef; + border: 1px solid #bababa; + color: #bababa; + white-space: nowrap; +} diff --git a/docs/assets/termynal/termynal.css b/docs/assets/termynal/termynal.css deleted file mode 100644 index affc90e34f..0000000000 --- a/docs/assets/termynal/termynal.css +++ /dev/null @@ -1,132 +0,0 @@ -/** - * termynal.js - * - * @author Ines Montani - * @version 0.0.1 - * @license MIT - * - * Modified version from https://github.com/tiangolo/typer - */ - -:root { - --color-bg: #252a33; - --color-text: #eee; - --color-text-subtle: #a2a2a2; -} - -[data-termynal] { - width: auto; - max-width: 100%; - background: var(--color-bg); - color: var(--color-text); - font-size: 18px; - /* font-family: 'Fira Mono', Consolas, Menlo, Monaco, 'Courier New', Courier, monospace; */ - font-family: 'Roboto Mono', 'Fira Mono', Consolas, Menlo, Monaco, 'Courier New', Courier, monospace; - border-radius: 4px; - padding: 75px 45px 35px; - position: relative; - -webkit-box-sizing: border-box; - box-sizing: border-box; -} - -[data-termynal]:before { - content: ''; - position: absolute; - top: 15px; - left: 15px; - display: inline-block; - width: 15px; - height: 15px; - border-radius: 50%; - /* A little hack to display the window buttons in one pseudo element. */ - background: #d9515d; - -webkit-box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930; - box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930; -} - -[data-termynal]:after { - content: 'bash'; - position: absolute; - color: var(--color-text-subtle); - top: 5px; - left: 0; - width: 100%; - text-align: center; -} - -a[data-terminal-control] { - text-align: right; - display: block; - color: #aebbff; -} - -[data-terminal-copy] { - text-align: right; - position: absolute; - top: 5px; - right: 5px; -} - -[data-terminal-copy].md-icon { - color: #aebbff; -} - -[data-ty] { - display: block; - line-height: 2; -} - -[data-ty]:before { - /* Set up defaults and ensure empty lines are displayed. */ - content: ''; - display: inline-block; - vertical-align: middle; -} - -[data-ty="input"]:before, -[data-ty-prompt]:before { - margin-right: 0.75em; - color: var(--color-text-subtle); -} - -[data-ty="input"]:before { - content: '$'; -} - -[data-ty][data-ty-prompt]:before { - content: attr(data-ty-prompt); -} - -[data-ty-cursor]:after { - content: attr(data-ty-cursor); - font-family: monospace; - margin-left: 0.5em; - -webkit-animation: blink 1s infinite; - animation: blink 1s infinite; -} - - -/* Cursor animation */ - -@-webkit-keyframes blink { - 50% { - opacity: 0; - } -} - -@keyframes blink { - 50% { - opacity: 0; - } -} - -/* tooltip */ - -[data-md-state="open"] { - transform: translateY(0); - opacity: 1; - transition: - transform 400ms cubic-bezier(0.075, 0.85, 0.175, 1), - opacity 400ms; - pointer-events: initial; -} diff --git a/docs/assets/termynal/termynal.js b/docs/assets/termynal/termynal.js deleted file mode 100644 index 8a572449ae..0000000000 --- a/docs/assets/termynal/termynal.js +++ /dev/null @@ -1,411 +0,0 @@ -/** - * termynal.js - * A lightweight, modern and extensible animated terminal window, using - * async/await. - * - * @author Ines Montani - * @version 0.0.1 - * @license MIT - * - * Modified version from https://github.com/tiangolo/typer - * - */ - -'use strict'; - -/** Generate a terminal widget. */ -class Termynal { - /** - * Construct the widget's settings. - * @param {(string|Node)=} container - Query selector or container element. - * @param {Object=} options - Custom settings. - * @param {string} options.prefix - Prefix to use for data attributes. - * @param {number} options.startDelay - Delay before animation, in ms. - * @param {number} options.typeDelay - Delay between each typed character, in ms. - * @param {number} options.lineDelay - Delay between each line, in ms. - * @param {number} options.progressLength - Number of characters displayed as progress bar. - * @param {string} options.progressChar – Character to use for progress bar, defaults to █. - * @param {number} options.progressPercent - Max percent of progress. - * @param {string} options.cursor – Character to use for cursor, defaults to ▋. - * @param {Object[]} lineData - Dynamically loaded line data objects. - * @param {boolean} options.noInit - Don't initialise the animation. - */ - constructor(container = '#termynal', options = {}) { - this.container = (typeof container === 'string') ? document.querySelector(container) : container; - this.pfx = `data-${options.prefix || 'ty'}`; - this.originalStartDelay = this.startDelay = options.startDelay - || parseFloat(this.container.getAttribute(`${this.pfx}-startDelay`)) || 600; - this.originalTypeDelay = this.typeDelay = options.typeDelay - || parseFloat(this.container.getAttribute(`${this.pfx}-typeDelay`)) || 50; - this.originalLineDelay = this.lineDelay = options.lineDelay - || parseFloat(this.container.getAttribute(`${this.pfx}-lineDelay`)) || 500; - this.progressLength = options.progressLength - || parseFloat(this.container.getAttribute(`${this.pfx}-progressLength`)) || 40; - this.progressChar = options.progressChar - || this.container.getAttribute(`${this.pfx}-progressChar`) || '█'; - this.progressPercent = options.progressPercent - || parseFloat(this.container.getAttribute(`${this.pfx}-progressPercent`)) || 100; - this.cursor = options.cursor - || this.container.getAttribute(`${this.pfx}-cursor`) || '▋'; - this.lineData = this.lineDataToElements(options.lineData || []); - this.loadLines() - if (!options.noInit) this.init() - } - - loadLines() { - // Load all the lines and create the container so that the size is fixed - // Otherwise it would be changing and the user viewport would be constantly - // moving as she/he scrolls - const finish = this.generateFinish() - finish.style.visibility = 'hidden' - this.container.appendChild(finish) - // Appends dynamically loaded lines to existing line elements. - this.lines = [...this.container.querySelectorAll(`[${this.pfx}]`)].concat(this.lineData); - for (let line of this.lines) { - line.style.visibility = 'hidden' - this.container.appendChild(line) - } - const restart = this.generateRestart() - restart.style.visibility = 'hidden' - this.container.appendChild(restart) - this.container.setAttribute('data-termynal', ''); - } - - /** - * Initialise the widget, get lines, clear container and start animation. - */ - init() { - /** - * Calculates width and height of Termynal container. - * If container is empty and lines are dynamically loaded, defaults to browser `auto` or CSS. - */ - const containerStyle = getComputedStyle(this.container); - this.container.style.width = containerStyle.width !== '0px' ? - containerStyle.width : undefined; - this.container.style.minHeight = containerStyle.height !== '0px' ? - containerStyle.height : undefined; - - this.container.setAttribute('data-termynal', ''); - this.container.innerHTML = ''; - for (let line of this.lines) { - line.style.visibility = 'visible' - } - this.start(); - } - - - /** - * Start the animation and rener the lines depending on their data attributes. - */ - async start() { - this.addCopy() - this.addFinish() - await this._wait(this.startDelay); - - for (let line of this.lines) { - const type = line.getAttribute(this.pfx); - const delay = line.getAttribute(`${this.pfx}-delay`) || this.lineDelay; - - if (type == 'input') { - line.setAttribute(`${this.pfx}-cursor`, this.cursor); - await this.type(line); - await this._wait(delay); - } - - else if (type == 'progress') { - await this.progress(line); - await this._wait(delay); - } - - else { - this.container.appendChild(line); - await this._wait(delay); - } - - line.removeAttribute(`${this.pfx}-cursor`); - } - this.addRestart() - this.finishElement.style.visibility = 'hidden' - this.lineDelay = this.originalLineDelay - this.typeDelay = this.originalTypeDelay - this.startDelay = this.originalStartDelay - } - - generateRestart() { - const restart = document.createElement('a') - restart.onclick = (e) => { - e.preventDefault() - this.container.innerHTML = '' - this.init() - } - restart.href = '#' - restart.setAttribute('data-terminal-control', '') - restart.innerHTML = "restart ↻" - return restart - } - - generateCopy() { - var dialog = document.getElementsByClassName('md-dialog')[0] - var dialog_text = document.getElementsByClassName('md-dialog__inner md-typeset')[0] - const copy = document.createElement('a') - copy.classList.add("md-clipboard") - copy.classList.add("md-icon") - copy.onclick = (e) => { - e.preventDefault() - var command = '' - for (let line of this.lines) { - if (line.getAttribute("data-ty") == 'input') { - command = command + line.innerHTML + '\n' - } - } - navigator.clipboard.writeText(command) - dialog.setAttribute('data-md-state', 'open'); - dialog_text.innerText = 'Copied to clipboard'; - - setTimeout(function () { - dialog.removeAttribute('data-md-state'); - }, 2000); - } - copy.setAttribute('data-terminal-copy', '') - return copy - } - - generateFinish() { - const finish = document.createElement('a') - finish.onclick = (e) => { - e.preventDefault() - this.lineDelay = 0 - this.typeDelay = 0 - this.startDelay = 0 - } - finish.href = '#' - finish.setAttribute('data-terminal-control', '') - finish.innerHTML = "fast →" - this.finishElement = finish - return finish - } - - addRestart() { - const restart = this.generateRestart() - this.container.appendChild(restart) - } - - addFinish() { - const finish = this.generateFinish() - this.container.appendChild(finish) - } - - addCopy() { - let copy = this.generateCopy() - this.container.appendChild(copy) - } - - /** - * Animate a typed line. - * @param {Node} line - The line element to render. - */ - async type(line) { - const chars = [...line.textContent]; - line.textContent = ''; - this.container.appendChild(line); - - for (let char of chars) { - const delay = line.getAttribute(`${this.pfx}-typeDelay`) || this.typeDelay; - await this._wait(delay); - line.textContent += char; - } - } - - /** - * Animate a progress bar. - * @param {Node} line - The line element to render. - */ - async progress(line) { - const progressLength = line.getAttribute(`${this.pfx}-progressLength`) - || this.progressLength; - const progressChar = line.getAttribute(`${this.pfx}-progressChar`) - || this.progressChar; - const chars = progressChar.repeat(progressLength); - const progressPercent = line.getAttribute(`${this.pfx}-progressPercent`) - || this.progressPercent; - line.textContent = ''; - this.container.appendChild(line); - - for (let i = 1; i < chars.length + 1; i++) { - await this._wait(this.typeDelay) / 4; - const percent = Math.round(i / chars.length * 100); - line.textContent = `${chars.slice(0, i)} ${percent}%`; - if (percent > progressPercent) { - break; - } - } - } - - /** - * Helper function for animation delays, called with `await`. - * @param {number} time - Timeout, in ms. - */ - _wait(time) { - return new Promise(resolve => setTimeout(resolve, time)); - } - - /** - * Converts line data objects into line elements. - * - * @param {Object[]} lineData - Dynamically loaded lines. - * @param {Object} line - Line data object. - * @returns {Element[]} - Array of line elements. - */ - lineDataToElements(lineData) { - return lineData.map(line => { - let div = document.createElement('div'); - div.innerHTML = `${line.value || ''}`; - - return div.firstElementChild; - }); - } - - /** - * Helper function for generating attributes string. - * - * @param {Object} line - Line data object. - * @returns {string} - String of attributes. - */ - _attributes(line) { - let attrs = ''; - for (let prop in line) { - // Custom add class - if (prop === 'class') { - attrs += ` class=${line[prop]} ` - continue - } - if (prop === 'type') { - attrs += `${this.pfx}="${line[prop]}" ` - } else if (prop !== 'value') { - attrs += `${this.pfx}-${prop}="${line[prop]}" ` - } - } - - return attrs; - } -} - -/** -* HTML API: If current script has container(s) specified, initialise Termynal. -*/ -if (document.currentScript.hasAttribute('data-termynal-container')) { - const containers = document.currentScript.getAttribute('data-termynal-container'); - containers.split('|') - .forEach(container => new Termynal(container)) -} - -document.querySelectorAll(".use-termynal").forEach(node => { - node.style.display = "block"; - new Termynal(node, { - lineDelay: 500 - }); -}); -const progressLiteralStart = "---> 100%"; -const promptLiteralStart = "$ "; -const customPromptLiteralStart = "$* "; -const commentPromptLiteralStart = "# "; -const colorOutputLiteralStart = "color:"; -const termynalActivateClass = "termy"; -let termynals = []; - -function createTermynals() { - document - .querySelectorAll(`.${termynalActivateClass} .highlight`) - .forEach(node => { - const text = node.textContent; - const lines = text.split("\n"); - const useLines = []; - let buffer = []; - function saveBuffer() { - if (buffer.length) { - let isBlankSpace = true; - buffer.forEach(line => { - if (line) { - isBlankSpace = false; - } - }); - var dataValue = {}; - if (isBlankSpace) { - dataValue["delay"] = 0; - } - if (buffer[buffer.length - 1] === "") { - // A last single
won't have effect - // so put an additional one - buffer.push(""); - } - - const bufferValue = buffer.join("
"); - dataValue["value"] = bufferValue; - useLines.push(dataValue); - buffer = []; - } - } - for (let line of lines) { - if (line === progressLiteralStart) { - saveBuffer(); - useLines.push({ - type: "progress" - }); - } else if (line.startsWith(promptLiteralStart)) { - saveBuffer(); - const value = line.replace(promptLiteralStart, "").trimEnd(); - useLines.push({ - type: "input", - value: value - }); - } else if (line.startsWith(commentPromptLiteralStart)) { - saveBuffer(); - const value = "💬 " + line.replace(commentPromptLiteralStart, "").trimEnd(); - const color_value = "" + value + "" - useLines.push({ - value: color_value, - class: "termynal-comment", - delay: 0 - }); - } else if (line.startsWith(customPromptLiteralStart)) { - saveBuffer(); - const prompt = line.slice(3, line.indexOf(' ', 3)) - let value = line.slice(line.indexOf(' ', 3)).trimEnd(); - useLines.push({ - type: "input", - value: value, - prompt: prompt - }); - } else if (line.startsWith(colorOutputLiteralStart)) { - let color = line.substring(0, line.indexOf(' ')); - let line_value = line.substring(line.indexOf(' ') + 1); - var color_line = "" + line_value + "" - buffer.push(color_line); - } else { - buffer.push(line); - } - } - saveBuffer(); - const div = document.createElement("div"); - node.replaceWith(div); - const termynal = new Termynal(div, { - lineData: useLines, - noInit: true, - lineDelay: 500 - }); - termynals.push(termynal); - }); -} - -function loadVisibleTermynals() { - termynals = termynals.filter(termynal => { - if (termynal.container.getBoundingClientRect().top - innerHeight <= 0) { - termynal.init(); - return false; - } - return true; - }); -} -window.addEventListener("scroll", loadVisibleTermynals); -createTermynals(); -loadVisibleTermynals(); diff --git a/docs/data/converters.md b/docs/data/converters.md index 6914dfa998..1897eabcf9 100644 --- a/docs/data/converters.md +++ b/docs/data/converters.md @@ -218,3 +218,13 @@ one per entity, that can be used to write to a dataframe. The schema of each pro options: heading_level: 4 show_source: false + +## Markup (`converter="markup"`) {: #edsnlp.data.converters.MarkupToDocConverter } + +This converter is used to convert markup data, such as Markdown or XML into documents. +This can be particularly useful when you want to create annotated documents from scratch (e.g., for testing purposes). + +::: edsnlp.data.converters.MarkupToDocConverter + options: + heading_level: 4 + show_source: false diff --git a/docs/data/index.md b/docs/data/index.md index e1198590c7..4bbd9899fe 100644 --- a/docs/data/index.md +++ b/docs/data/index.md @@ -46,9 +46,10 @@ At the moment, we support the following data sources: and the following schemas: -| Schema | Snippet | -|:---------------------------------------------------------------------------|------------------------| -| [Custom](./converters/#custom) | `converter=custom_fn` | -| [OMOP](./converters/#omop) | `converter="omop"` | -| [Standoff](./converters/#standoff) | `converter="standoff"` | -| [Ents](./converters/#edsnlp.data.converters.EntsDoc2DictConverter) | `converter="ents"` | +| Schema | Snippet | +|:--------------------------------------------------------------------|------------------------| +| [Custom](./converters/#custom) | `converter=custom_fn` | +| [OMOP](./converters/#omop) | `converter="omop"` | +| [Standoff](./converters/#standoff) | `converter="standoff"` | +| [Ents](./converters/#edsnlp.data.converters.EntsDoc2DictConverter) | `converter="ents"` | +| [Markup](./converters/#edsnlp.data.converters.MarkupToDocConverter) | `converter="markup"` | diff --git a/docs/metrics/index.md b/docs/metrics/index.md new file mode 100644 index 0000000000..25201285ad --- /dev/null +++ b/docs/metrics/index.md @@ -0,0 +1,12 @@ +# Metrics + +EDS-NLP provides several metrics to evaluate the performance of its components. These metrics can be used to assess the quality of entity recognition, negation detection, and other tasks. + +At the moment, we support the following metrics: + +| Metric | Description | +|:---------------------|:---------------------------------------------------| +| `eds.ner_exact` | NER metric with exact match at the span level | +| `eds.ner_token` | NER metric with token-level match | +| `eds.ner_overlap` | NER metric with overlap match at the span level | +| `eds.span_attribute` | Span multi-label multi-class classification metric | diff --git a/docs/metrics/ner.md b/docs/metrics/ner.md new file mode 100644 index 0000000000..52855565d6 --- /dev/null +++ b/docs/metrics/ner.md @@ -0,0 +1,63 @@ +# NER Metrics + +We provide several metrics to evaluate the performance of Named Entity Recognition (NER) components. +Let's look at an example and see how they differ. We'll use the following two documents: a reference +document (ref) and a document with predicted entities (pred). + +### Shared example + ++-------------------------------------------------------------+------------------------------------------+ +| pred | ref | ++=============================================================+==========================================+ +| *La*{.chip data-chip=PER} *patiente*{.chip data-chip=PER} a | La *patiente*{.chip data-chip=PER} a | +| une *fièvre aigüe*{.chip data-chip=DIS} | *une fièvre*{.chip data-chip=DIS} aigüe. | ++-------------------------------------------------------------+------------------------------------------+ + +Let's create matching documents in EDS-NLP using the following code snippet: + +```python +from edsnlp.data.converters import MarkupToDocConverter + +conv = MarkupToDocConverter(preset="md", span_setter="entities") + +pred = conv("[La](PER) [patiente](PER) a une [fièvre aiguë](DIS).") +ref = conv("La [patiente](PER) a [une fièvre](DIS) aiguë.") +``` + +### Summary of metrics + +The table below shows the different scores depending on the metric used. + +| Metric | Precision | Recall | F1 | +|--------------------|-----------|--------|------| +| Span-level exact | 0.33 | 0.5 | 0.40 | +| Token-level | 0.50 | 0.67 | 0.57 | +| Span-level overlap | 0.67 | 1.0 | 0.80 | + +## Span-level NER metric with exact match {: #edsnlp.metrics.ner.NerExactMetric } + +::: edsnlp.metrics.ner.NerExactMetric + options: + heading_level: 2 + show_bases: false + show_source: false + only_class_level: true + +## Span-level NER metric with approximate match {: #edsnlp.metrics.ner.NerOverlapMetric } + +::: edsnlp.metrics.ner.NerOverlapMetric + options: + heading_level: 2 + show_bases: false + show_source: false + only_class_level: true + + +## Token-level NER metric {: #edsnlp.metrics.ner.NerTokenMetric } + +::: edsnlp.metrics.ner.NerTokenMetric + options: + heading_level: 2 + show_bases: false + show_source: false + only_class_level: true diff --git a/docs/metrics/span-attribute.md b/docs/metrics/span-attribute.md new file mode 100644 index 0000000000..07db17157b --- /dev/null +++ b/docs/metrics/span-attribute.md @@ -0,0 +1,43 @@ +# Span Attribute Classification Metrics {: #edsnlp.metrics.span_attribute.SpanAttributeMetric } + +Several NLP tasks consist in classifying existing spans of text into multiple classes, +such as the detection of negation, hypothesis or span linking. We provide a metric +to evaluate the performance of such tasks. + +Let's look at an example. We'll use the following two documents: a reference +document (ref) and a document with predicted entities (pred). + ++-------------------------------------------------------------------+-------------------------------------------------------------------+ +| pred | ref | ++===================================================================+===================================================================+ +| Le patient n'est pas *fièvreux*{.chip data-chip="SYMP neg=true"}, | Le patient n'est pas *fièvreux*{.chip data-chip="SYMP neg=true"}, | +| son père a *du diabète*{.chip data-chip="DIS carrier=PATIENT"}. | son père a *du diabète*{.chip data-chip="DIS carrier=FATHER"}. | +| Pas d'évolution du | Pas d'évolution du | +| *cancer*{.chip data-chip="DIS neg=true carrier=PATIENT"}. | *cancer*{.chip data-chip="DIS carrier=PATIENT"}. | ++-------------------------------------------------------------------+-------------------------------------------------------------------+ + +We can quickly create matching documents in EDS-NLP using the following code snippet: + +```python +from edsnlp.data.converters import MarkupToDocConverter + +conv = MarkupToDocConverter(preset="md", span_setter="entities") +# Create a document with predicted attributes and a reference document +pred = conv( + "Le patient n'est pas [fièvreux](SYMP neg=true), " + "son père a [du diabète](DIS neg=false carrier=PATIENT). " + "Pas d'évolution du [cancer](DIS neg=true carrier=PATIENT)." +) +ref = conv( + "Le patient n'est pas [fièvreux](SYMP neg=true), " + "son père a [du diabète](DIS neg=false carrier=FATHER). " + "Pas d'évolution du [cancer](DIS neg=false carrier=PATIENT)." +) +``` + +::: edsnlp.metrics.span_attribute.SpanAttributeMetric + options: + heading_level: 2 + show_bases: false + show_source: false + only_class_level: true diff --git a/docs/scripts/clickable_snippets.py b/docs/scripts/clickable_snippets.py index 2b901448a1..affd0d4f90 100644 --- a/docs/scripts/clickable_snippets.py +++ b/docs/scripts/clickable_snippets.py @@ -99,6 +99,7 @@ def on_post_page( for ep in ( *self.get_ep_namespace(ep, "spacy_factories"), *self.get_ep_namespace(ep, "edsnlp_factories"), + *self.get_ep_namespace(ep, "spacy_scorers"), ) } diff --git a/edsnlp/__init__.py b/edsnlp/__init__.py index 965d8db6b2..59bc1a46d7 100644 --- a/edsnlp/__init__.py +++ b/edsnlp/__init__.py @@ -45,7 +45,11 @@ def find_spec(self, fullname, path, target=None): # pragma: no cover spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name)) return spec if fullname.startswith("edsnlp.metrics.span_classification"): - new_name = "edsnlp.metrics.span_attributes" + fullname[34:] + new_name = "edsnlp.metrics.span_attribute" + fullname[34:] + spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name)) + return spec + if fullname.startswith("edsnlp.metrics.span_attributes"): + new_name = "edsnlp.metrics.span_attribute" + fullname[30:] spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name)) return spec if "span_qualifier" in fullname.split("."): diff --git a/edsnlp/data/converters.py b/edsnlp/data/converters.py index c1247a14d0..c601b4d3f3 100644 --- a/edsnlp/data/converters.py +++ b/edsnlp/data/converters.py @@ -24,6 +24,7 @@ from confit.registry import ValidatedFunction from spacy.tokenizer import Tokenizer from spacy.tokens import Doc, Span +from typing_extensions import Literal import edsnlp from edsnlp import registry @@ -707,6 +708,225 @@ def __call__(self, doc): ] +# ex: `[The [cat](ANIMAL) is [black](COLOR hex="#000000")]. + + +@registry.factory.register("eds.markup_to_doc", spacy_compatible=False) +class MarkupToDocConverter: + """ + Examples + -------- + ```python + import edsnlp + + # Any kind of reader (`edsnlp.data.read/from_...`) can be used here + # If input items are dicts, the converter expects a "text" key/column. + docs = list( + edsnlp.data.from_iterable( + [ + "This [is](VERB negation=True) not a [test](NOUN).", + "This is another [test](NOUN).", + ], + converter="markup", + span_setter="entities", + ), + ) + print(docs[0].spans["entities"]) + # Out: [is, test] + ``` + + You can also use it directly on a string: + + ```python + from edsnlp.data.converters import MarkupToDocConverter + + converter = MarkupToDocConverter( + span_setter={"verb": "VERB", "noun": "NOUN"}, + preset="xml", + ) + doc = converter("This is not a test.") + print(doc.spans["verb"]) + # Out: [is] + print(doc.spans["verb"][0]._.negation) + # Out: True + ``` + + Parameters + ---------- + preset: Literal["md", "xml"] + The preset to use for the markup format. Defaults to "md" (Markdown-like + syntax). Use "xml" for XML-like syntax. + opener: Optional[str] + The regex pattern to match the opening tag of the markup. Defaults to the + preset's opener. + closer: Optional[str] + The regex pattern to match the closing tag of the markup. Defaults to the + preset's closer. + tokenizer: Optional[Tokenizer] + The tokenizer instance used to tokenize the documents. Likely not needed since + by default it uses the current context tokenizer : + + - the tokenizer of the next pipeline run by `.map_pipeline` in a + [Stream][edsnlp.core.stream.Stream]. + - or the `eds` tokenizer by default. + span_setter: SpanSetterArg + The span setter to use when setting the spans in the documents. Defaults to + setting the spans in the `ents` attribute and creates a new span group for + each JSON entity label. + span_attributes: Optional[AttributesMappingArg] + Mapping from markup attributes to Span extensions (can be a list too). + By default, all attributes are imported as Span extensions with the same name. + keep_raw_attribute_values: bool + Whether to keep the raw attribute values (as strings) or to convert them to + Python objects (e.g. booleans). + default_attributes: AttributesMappingArg + How to set attributes on spans for which no attribute value was found in the + input format. This is especially useful for negation, or frequent attributes + values (e.g. "negated" is often False, "temporal" is often "present"), that + annotators may not want to annotate every time. + bool_attributes: AsList[str] + List of boolean attributes to set to False by default. This is useful for + attributes that are often not annotated, but you want to have a default value + for them. + """ + + PRESETS = { + "md": { + "opener": r"(?P\[)", + "closer": r"(?P\]\(\s*(?P[a-zA-Z0-9]+)\s*(?P.*?)\))", # noqa: E501 + }, + "xml": { + "opener": r"(?P<(?P[a-zA-Z0-9]+)(?P.*?)>)", # noqa: E501 + "closer": r"(?P[a-zA-Z0-9]+)>)", + }, + } + + def __init__( + self, + *, + tokenizer: Optional[Tokenizer] = None, + span_setter: SpanSetterArg = {"ents": True, "*": True}, + span_attributes: Optional[AttributesMappingArg] = None, + keep_raw_attribute_values: bool = False, + default_attributes: AttributesMappingArg = {}, + bool_attributes: AsList[str] = [], + preset: Literal["md", "xml"] = "md", + opener: Optional[str] = None, + closer: Optional[str] = None, + ): + self.tokenizer = tokenizer + self.span_setter = span_setter + self.span_attributes = span_attributes + self.keep_raw_attribute_values = keep_raw_attribute_values + self.default_attributes = dict(default_attributes) + for attr in bool_attributes: + self.default_attributes[attr] = False + self.opener = opener or self.PRESETS[preset]["opener"] + self.closer = closer or self.PRESETS[preset]["closer"] + + def _as_python(self, value: str): + import ast + + if self.keep_raw_attribute_values: + return value + try: + return ast.literal_eval(value) + except Exception: + if value.lower() == "true": + return True + elif value.lower() == "false": + return False + return value + + def _parse(self, inline_text: str): + import re + + last_inline_offset = 0 + starts = [] + text = "" + seps = list(re.finditer(self.opener + "|" + self.closer, inline_text)) + entities = [] + for i, sep in enumerate(seps): + is_opener = bool(sep["opener"]) + groups = sep.groupdict() + inline_start = sep.start("opener") if is_opener else sep.start("closer") + inline_end = sep.end("opener") if is_opener else sep.end("closer") + label = groups.get("closer_label", groups.get("opener_label")) + attrs = groups.get("closer_attrs", groups.get("opener_attrs")) or "" + attrs = { + k: self._as_python(v) + for k, v in (kv.split("=") for kv in attrs.split()) + } + text += inline_text[last_inline_offset:inline_start] + if is_opener: + starts.append((len(text), label, attrs)) + else: + try: + idx = next( + i + for i in range(len(starts) - 1, -1, -1) + if starts[i][1] == label or not label or not starts[i][1] + ) + except StopIteration: + warnings.warn(f"Unmatched closing tag for '{sep.group()}'") + continue + start, start_label, start_attrs = starts.pop(idx) + entities.append( + (start, len(text), start_label or label, {**attrs, **start_attrs}) + ) + last_inline_offset = inline_end + if last_inline_offset < len(inline_text): + text += inline_text[last_inline_offset:] + if starts: + warnings.warn( + f"Unmatched opening tags at indices {', '.join(s[1] for s in starts)}" + ) + entities = sorted(entities) + return text, entities + + def __call__(self, obj, tokenizer=None): + tok = tokenizer or self.tokenizer or get_current_tokenizer() + if isinstance(obj, str): + obj = {"text": obj} + annotated = obj["text"] + plain, raw_ents = self._parse(annotated) + + doc = tok(plain) + doc._.note_id = obj.get("doc_id", obj.get(FILENAME)) + + for dst in ( + *(() if self.span_attributes is None else self.span_attributes.values()), + *self.default_attributes, + ): + if not Span.has_extension(dst): + Span.set_extension(dst, default=None) + + spans = [] + for start, end, label, attrs in raw_ents: + span = doc.char_span(start, end, label=label, alignment_mode="expand") + if span is None: + continue + for k, v in attrs.items(): + new_k = ( + self.span_attributes.get(k) + if self.span_attributes is not None + else k + ) + if self.span_attributes is None and not Span.has_extension(new_k): + Span.set_extension(new_k, default=None) + if new_k: + span._.set(new_k, v) + spans.append(span) + + set_spans(doc, spans, span_setter=self.span_setter) + for attr, value in self.default_attributes.items(): + for span in spans: + if span._.get(attr) is None: + span._.set(attr, value) + + return doc + + def get_dict2doc_converter( converter: Union[str, Callable], kwargs ) -> Tuple[Callable, Dict]: @@ -716,7 +936,11 @@ def get_dict2doc_converter( filtered = [ name for name in available - if converter == name or (converter in name and "dict2doc" in name) + if converter == name + or ( + converter in name + and (name.endswith("2doc") or name.endswith("to_doc")) + ) ] converter = edsnlp.registry.factory.get(filtered[0]) nlp = kwargs.pop("nlp", None) @@ -726,7 +950,9 @@ def get_dict2doc_converter( kwargs = {} return converter, kwargs except (KeyError, IndexError): - available = [v for v in available if "dict2doc" in v] + available = [ + v for v in available if (v.endswith("2doc") or v.endswith("to_doc")) + ] raise ValueError( f"Cannot find converter for format {converter}. " f"Available converters are {', '.join(available)}" @@ -745,14 +971,20 @@ def get_doc2dict_converter( filtered = [ name for name in available - if converter == name or (converter in name and "doc2dict" in name) + if converter == name + or ( + converter in name + and (name.endswith("2dict") or name.endswith("to_dict")) + ) ] converter = edsnlp.registry.factory.get(filtered[0]) converter = converter(**kwargs) kwargs = {} return converter, kwargs except (KeyError, IndexError): - available = [v for v in available if "doc2dict" in v] + available = [ + v for v in available if (v.endswith("2dict") or v.endswith("to_dict")) + ] raise ValueError( f"Cannot find converter for format {converter}. " f"Available converters are {', '.join(available)}" diff --git a/edsnlp/metrics/__init__.py b/edsnlp/metrics/__init__.py index e96f74017a..df17ab40a3 100644 --- a/edsnlp/metrics/__init__.py +++ b/edsnlp/metrics/__init__.py @@ -25,7 +25,7 @@ def average_precision(pred: Dict[Any, float], gold: Iterable[Any]): for i in range(1, len(precisions)): if recalls[i] > recalls[i - 1]: ap += (recalls[i] - recalls[i - 1]) * precisions[i] - return ap + return float(ap) def prf(pred: Collection, gold: Collection): diff --git a/edsnlp/metrics/ner.py b/edsnlp/metrics/ner.py index d39306445d..17bc9f5f71 100644 --- a/edsnlp/metrics/ner.py +++ b/edsnlp/metrics/ner.py @@ -1,3 +1,27 @@ +""" +We provide several metrics to evaluate the performance of Named Entity Recognition (NER) components. +Let's look at an example and see how they differ. We'll use the following two documents: a reference +document (ref) and a document with predicted entities (pred). + ++-------------------------------------------------------------+------------------------------------------+ +| pred | ref | ++=============================================================+==========================================+ +| *La*{.chip data-chip=PER} *patiente*{.chip data-chip=PER} a | La *patiente*{.chip data-chip=PER} a | +| une *fièvre aigüe*{.chip data-chip=DIS} | *une fièvre*{.chip data-chip=DIS} aigüe. | ++-------------------------------------------------------------+------------------------------------------+ + +Let's create matching documents in EDS-NLP using the following code snippet: + +```python +from edsnlp.data.converters import MarkupToDocConverter + +conv = MarkupToDocConverter(preset="md", span_setter="entities") + +pred = conv("[La](PER) [patiente](PER) a une [fièvre aiguë](DIS).") +ref = conv("La [patiente](PER) a [une fièvre](DIS) aiguë.") +``` +""" # noqa: E501 + import abc from collections import defaultdict from typing import Any, Dict, Optional @@ -13,26 +37,6 @@ def ner_exact_metric( micro_key: str = "micro", filter_expr: Optional[str] = None, ) -> Dict[str, Any]: - """ - Scores the extracted entities that may be overlapping or nested - by looking in the spans returned by a given SpanGetter object. - - Parameters - ---------- - examples: Examples - The examples to score, either a tuple of (golds, preds) or a list of - spacy.training.Example objects - span_getter: SpanGetter - The span getter to use to extract the spans from the document - micro_key: str - The key to use to store the micro-averaged results for spans of all types - filter_expr: str - The filter expression to use to filter the documents - - Returns - ------- - Dict[str, Any] - """ examples = make_examples(examples) if filter_expr is not None: filter_fn = eval(f"lambda doc: {filter_expr}") @@ -65,27 +69,6 @@ def ner_token_metric( micro_key: str = "micro", filter_expr: Optional[str] = None, ) -> Dict[str, Any]: - """ - Scores the extracted entities that may be overlapping or nested - by looking in `doc.ents`, and `doc.spans`, and comparing the predicted - and gold entities at the TOKEN level. - - Parameters - ---------- - examples: Examples - The examples to score, either a tuple of (golds, preds) or a list of - spacy.training.Example objects - span_getter: SpanGetter - The span getter to use to extract the spans from the document - micro_key: str - The key to use to store the micro-averaged results for spans of all types - filter_expr: str - The filter expression to use to filter the documents - - Returns - ------- - Dict[str, Any] - """ examples = make_examples(examples) if filter_expr is not None: filter_fn = eval(f"lambda doc: {filter_expr}") @@ -130,30 +113,6 @@ def ner_overlap_metric( filter_expr: Optional[str] = None, threshold: float = 0.5, ) -> Dict[str, Any]: - """ - Scores the extracted entities that may be overlapping or nested - by looking in `doc.ents`, and `doc.spans`, and comparing the predicted - and gold entities and counting true when a predicted entity overlaps - with a gold entity of the same label - - Parameters - ---------- - examples: Examples - The examples to score, either a tuple of (golds, preds) or a list of - spacy.training.Example objects - span_getter: SpanGetter - The span getter to use to extract the spans from the document - micro_key: str - The key to use to store the micro-averaged results for spans of all types - filter_expr: str - The filter expression to use to filter the documents - threshold: float - The threshold to use to consider that two spans overlap - - Returns - ------- - Dict[str, Any] - """ examples = make_examples(*examples) if filter_expr is not None: filter_fn = eval(f"lambda doc: {filter_expr}") @@ -239,6 +198,54 @@ def __call__(self, *examples) -> Dict[str, Any]: deprecated=["eds.ner_exact_metric"], ) class NerExactMetric(NerMetric): + r""" + The `eds.ner_exact` metric + scores the extracted entities (that may be overlapping or nested) + by looking in the spans returned by a given SpanGetter object and + comparing predicted spans to gold spans for **exact** boundary and label matches. + + Let's view these elements as collections of (span → label) and count how + many of the predicted spans match the gold spans exactly (and vice versa): + + +----------------------------------------------+--------------------------------------------+ + | pred | ref | + +==============================================+============================================+ + | *La*{.chip .fp data-chip=PER}
| *patiente*{.chip .tp data-chip=PER}
| + | *patiente*{.chip .tp data-chip=PER}
| *une fièvre*{.chip .fp data-chip=DIS}
| + | *fièvre aiguë*{.chip .fp data-chip=DIS}
| | + +----------------------------------------------+--------------------------------------------+ + + Precision, Recall and F1 (micro-average and per‐label) are computed as follows: + + - Precision: `p = |matched items of pred| / |pred|` + - Recall: `r = |matched items of ref| / |ref|` + - F1: `f = 2 / (1/p + 1/f)` + + Examples + -------- + + ```python + from edsnlp.metrics.ner import NerExactMetric + + metric = NerExactMetric(span_getter=conv.span_setter, micro_key="micro") + metric([ref], [pred]) + # Out: { + # 'micro': {'f': 0.4, 'p': 0.33, 'r': 0.5, 'tp': 1, 'support': 2, 'positives': 3}, + # 'PER': {'f': 0.67, 'p': 0.5, 'r': 1, 'tp': 1, 'support': 1, 'positives': 2}, + # 'DIS': {'f': 0.0, 'p': 0.0, 'r': 0.0, 'tp': 0, 'support': 1, 'positives': 1}, + # } + ``` + + Parameters + ---------- + span_getter: SpanGetter + The span getter to use to extract the spans from the document + micro_key: str + The key to use to store the micro-averaged results for spans of all types + filter_expr: str + The filter expression to use to filter the documents. Evaluated with `doc` as the variable. + """ # noqa: E501 + def __init__( self, span_getter: SpanGetterArg, @@ -265,6 +272,58 @@ def __call__(self, *examples): deprecated=["eds.ner_token_metric"], ) class NerTokenMetric(NerMetric): + r""" + The `eds.ner_token` metric + scores the extracted entities that may be overlapping or nested by looking in + `doc.ents`, and `doc.spans`, and comparing the predicted and gold entities at the + **token** level. + + Assuming we use the `eds` (or `fr` or `en`) tokenizer, in the above example, there + are 3 annotated tokens in the reference, and 4 annotated tokens in the prediction. + Let's view these elements as sets of (token, label) and count how many of the + predicted tokens match the gold tokens exactly (and vice versa): + + +------------------------------------------+------------------------------------------+ + | pred | ref | + +==========================================+==========================================+ + | *La*{.chip .fp data-chip=PER}
| *patiente*{.chip .tp data-chip=PER}
| + | *patiente*{.chip .tp data-chip=PER}
| *une*{.chip .fp data-chip=DIS}
| + | *fièvre*{.chip .tp data-chip=DIS}
| *fièvre*{.chip .tp data-chip=DIS} | + | *aiguë*{.chip .fp data-chip=DIS} | | + +------------------------------------------+------------------------------------------+ + + Precision, Recall and F1 (micro-average and per‐label) are computed as follows: + + - Precision: `p = |matched items of pred| / |pred|` + - Recall: `r = |matched items of ref| / |ref|` + - F1: `f = 2 / (1/p + 1/f)` + + Examples + -------- + + ```python + from edsnlp.metrics.ner import NerTokenMetric + + metric = NerTokenMetric(span_getter=conv.span_setter, micro_key="micro") + metric([ref], [pred]) + # Out: { + # 'micro': {'f': 0.57, 'p': 0.5, 'r': 0.67, 'tp': 2, 'support': 3, 'positives': 4}, + # 'PER': {'f': 0.67, 'p': 0.5, 'r': 1, 'tp': 1, 'support': 1, 'positives': 2}, + # 'DIS': {'f': 0.5, 'p': 0.5, 'r': 0.5, 'tp': 1, 'support': 2, 'positives': 2} + # } + ``` + + Parameters + ---------- + span_getter: SpanGetter + The span getter to use to extract the spans from the document + micro_key: str + The key to use to store the micro-averaged results for spans of all types + filter_expr: str + The filter expression to use to filter the documents. Will be evaluated + with `doc` as the variable name, so you can use `doc.ents`, `doc.spans`, etc. + """ # noqa: E501 + def __init__( self, span_getter: SpanGetterArg, @@ -291,6 +350,71 @@ def __call__(self, *examples): deprecated=["eds.ner_overlap_metric"], ) class NerOverlapMetric(NerMetric): + r""" + The `eds.ner_overlap` metric + scores the extracted entities that may be overlapping or nested + by looking in the spans returned by a given SpanGetter object and + counting a prediction as correct if it overlaps by at least the given + Dice‐coefficient threshold with a gold span of the same label. + + This metric is useful for evaluating NER systems where the exact boundaries + do not matter too much, but the presence of the entity at the same spot is important. + For instance, you may not want to penalize a system that forgets determiners if + the rest of the entity is correctly identified. + + Let's view these elements as sets of (span → label) and count how many of the + predicted spans match the gold spans by at least the given Dice coefficient + (and vice versa): + + +---------------------------------------------+------------------------------------------+ + | pred | ref | + +=============================================+==========================================+ + | *La*{.chip .fp data-chip=PER}
| *patiente*{.chip .tp data-chip=PER}
| + | *patiente*{.chip .tp data-chip=PER}
| *une fièvre*{.chip .tp data-chip=DIS} | + | *fièvre aiguë*{.chip .tp data-chip=DIS}
| | + +---------------------------------------------+------------------------------------------+ + + Precision, Recall and F1 (micro-average and per‐label) are computed as follows: + + - Precision: `p = |matched items of pred| / |pred|` + - Recall: `r = |matched items of ref| / |ref|` + - F1: `f = 2 / (1/p + 1/f)` + + !!! note "Overlap threshold" + + The threshold is the minimum Dice coefficient to consider two spans as overlapping. Setting + it to 1.0 will yield the same results as the `eds.ner_exact` metric, while setting it to a + near-zero value (e.g., like 1e-14) will match any two spans that share at least one token. + + Examples + -------- + + ```python + from edsnlp.metrics.ner import NerOverlapMetric + + metric = NerOverlapMetric( + span_getter=conv.span_setter, micro_key="micro", threshold=0.5 + ) + metric([ref], [pred]) + # Out: { + # 'micro': {'f': 0.8, 'p': 0.67, 'r': 1.0, 'tp': 2, 'support': 2, 'positives': 3}, + # 'PER': {'f': 0.67, 'p': 0.5, 'r': 1.0, 'tp': 1, 'support': 1, 'positives': 2}, + # 'DIS': {'f': 1.0, 'p': 1.0, 'r': 1.0, 'tp': 1, 'support': 1, 'positives': 1} + # } + ``` + + Parameters + ---------- + span_getter: SpanGetter + The span getter to use to extract the spans from the document + micro_key: str + The key to use to store the micro-averaged results for spans of all types + filter_expr: str + The filter expression to use to filter the documents + threshold: float + The threshold on the Dice coefficient to consider two spans as overlapping + """ # noqa: E501 + def __init__( self, span_getter: SpanGetterArg, diff --git a/edsnlp/metrics/span_attribute.py b/edsnlp/metrics/span_attribute.py new file mode 100644 index 0000000000..d1c7b6dac4 --- /dev/null +++ b/edsnlp/metrics/span_attribute.py @@ -0,0 +1,311 @@ +""" +Metrics for Span Attribute Classification + +# Span Attribute Classification Metrics {: #edsnlp.metrics.span_attribute.SpanAttributeMetric } + +Several NLP tasks consist in classifying existing spans of text into multiple classes, +such as the detection of negation, hypothesis or span linking. + +We provide a metric to evaluate the performance of such tasks, + +Let's look at an example: + ++-------------------------------------------------------------------+-------------------------------------------------------------------+ +| pred | ref | ++===================================================================+===================================================================+ +| Le patient n'est pas *fièvreux*{.chip data-chip="SYMP neg=true"}, | Le patient n'est pas *fièvreux*{.chip data-chip="SYMP neg=true"}, | +| son père a *du diabète*{.chip data-chip="DIS carrier=PATIENT"}. | son père a *du diabète*{.chip data-chip="DIS carrier=FATHER"}. | +| Pas d'évolution du | Pas d'évolution du | +| *cancer*{.chip data-chip="DIS neg=true carrier=PATIENT"}. | *cancer*{.chip data-chip="DIS carrier=PATIENT"}. | ++-------------------------------------------------------------------+-------------------------------------------------------------------+ + +We can quickly create matching documents in EDS-NLP using the following code snippet: + +```python +from edsnlp.data.converters import MarkupToDocConverter + +conv = MarkupToDocConverter(preset="md", span_setter="entities") +# Create a document with predicted attributes and a reference document +pred = conv( + "Le patient n'est pas [fièvreux](SYMP neg=true), " + "son père a [du diabète](DIS neg=false carrier=PATIENT). " + "Pas d'évolution du [cancer](DIS neg=true carrier=PATIENT)." +) +ref = conv( + "Le patient n'est pas [fièvreux](SYMP neg=true), " + "son père a [du diabète](DIS neg=false carrier=FATHER). " + "Pas d'évolution du [cancer](DIS neg=false carrier=PATIENT)." +) +``` +""" # noqa: E501 + +import warnings +from collections import defaultdict +from typing import Any, Dict, Optional + +from edsnlp import registry +from edsnlp.metrics import Examples, average_precision, make_examples, prf +from edsnlp.utils.bindings import BINDING_GETTERS, Attributes, AttributesArg +from edsnlp.utils.span_getters import SpanGetterArg, get_spans + + +def span_attribute_metric( + examples: Examples, + span_getter: SpanGetterArg, + attributes: Attributes = None, + include_falsy: bool = False, + default_values: Dict = {}, + micro_key: str = "micro", + filter_expr: Optional[str] = None, + **kwargs: Any, +): + if "qualifiers" in kwargs: + warnings.warn( + "The `qualifiers` argument of span_attribute_metric() is " + "deprecated. Use `attributes` instead.", + DeprecationWarning, + ) + assert attributes is None + attributes = kwargs.pop("qualifiers") + if attributes is None: + raise TypeError( + "span_attribute_metric() missing 1 required argument: 'attributes'" + ) + if kwargs: + raise TypeError( + f"span_attribute_metric() got unexpected keyword arguments: " + f"{', '.join(kwargs.keys())}" + ) + examples = make_examples(examples) + if filter_expr is not None: + filter_fn = eval(f"lambda doc: {filter_expr}") + examples = [eg for eg in examples if filter_fn(eg.reference)] + labels = defaultdict(lambda: (set(), set(), dict())) + labels["micro"] = (set(), set(), dict()) + total_pred_count = 0 + total_gold_count = 0 + + if not include_falsy: + default_values_ = defaultdict(lambda: False) + default_values_.update(default_values) + default_values = default_values_ + del default_values_ + for eg_idx, eg in enumerate(examples): + doc_spans = get_spans(eg.predicted, span_getter) + for span in doc_spans: + total_pred_count += 1 + beg, end = span.start, span.end + for attr, span_filter in attributes.items(): + if not (span_filter is True or span.label_ in span_filter): + continue + getter_key = attr if attr.startswith("_.") else f"_.{attr}" + value = BINDING_GETTERS[getter_key](span) + top_val, top_p = max( + getattr(span._, "prob", {}).get(attr, {}).items(), + key=lambda x: x[1], + default=(value, 1.0), + ) + if (top_val or include_falsy) and default_values[attr] != top_val: + labels[attr][2][(eg_idx, beg, end, attr, top_val)] = top_p + labels[micro_key][2][(eg_idx, beg, end, attr, top_val)] = top_p + if (value or include_falsy) and default_values[attr] != value: + labels[micro_key][0].add((eg_idx, beg, end, attr, value)) + labels[attr][0].add((eg_idx, beg, end, attr, value)) + + doc_spans = get_spans(eg.reference, span_getter) + for span in doc_spans: + total_gold_count += 1 + beg, end = span.start, span.end + for attr, span_filter in attributes.items(): + if not (span_filter is True or span.label_ in span_filter): + continue + getter_key = attr if attr.startswith("_.") else f"_.{attr}" + value = BINDING_GETTERS[getter_key](span) + if (value or include_falsy) and default_values[attr] != value: + labels[micro_key][1].add((eg_idx, beg, end, attr, value)) + labels[attr][1].add((eg_idx, beg, end, attr, value)) + + if total_pred_count != total_gold_count: + raise ValueError( + f"Number of predicted and gold spans differ: {total_pred_count} != " + f"{total_gold_count}. Make sure that you are running your span " + "attribute classification pipe on the gold annotations, and not spans " + "predicted by another NER pipe in your model." + ) + + for name, (pred, gold, pred_with_prob) in labels.items(): + print("-", name, "pred/gold", pred, gold, "=>", prf(pred, gold)) + return { + name: { + **prf(pred, gold), + "ap": average_precision(pred_with_prob, gold), + } + for name, (pred, gold, pred_with_prob) in labels.items() + } + + +@registry.metrics.register( + "eds.span_attribute", + deprecated=["eds.span_classification_scorer", "eds.span_attribute_scorer"], +) +class SpanAttributeMetric: + """ + The `eds.span_attribute` metric + evaluates span‐level attribute classification by comparing predicted and gold + attribute values on the same set of spans. For each attribute you specify, it + computes Precision, Recall, F1, number of true positives (tp), number of + gold instances (support), number of predicted instances (positives), and + the Average Precision (ap). A micro‐average over all attributes is also + provided under `micro_key`. + + ```python + from edsnlp.metrics.span_attribute import SpanAttributeMetric + + metric = SpanAttributeMetric( + span_getter=conv.span_setter, + # Evaluated attributes + attributes={ + "neg": True, # 'neg' on every entity + "carrier": ["DIS"], # 'carrier' only on 'DIS' entities + }, + # Ignore these default values when counting matches + default_values={ + "neg": False, + }, + micro_key="micro", + ) + ``` + + Let's enumerate (span -> attr = value) items in our documents. Only the items with + matching span boundaries, attribute name, and value are counted as a true positives. + For instance, with the predicted and reference spans of the example above: + + +--------------------------------------------------+-------------------------------------------------+ + | pred | ref | + +==================================================+=================================================+ + | *fièvreux → neg = True*{.chip .tp}
| *fièvreux → neg = True*{.chip .tp}
| + | *du diabète → neg = False*{.chip .na}
| *du diabète → neg = False*{.chip .na}
| + | *du diabète → carrier = PATIENT*{.chip .fp}
| *du diabète → carrier = FATHER*{.chip .fn}
| + | *cancer → neg = True*{.chip .fp}
| *cancer → neg = False*{.chip .na}
| + | *cancer → carrier = PATIENT*{.chip .tp} | *cancer → carrier = PATIENT*{.chip .tp} | + +--------------------------------------------------+-------------------------------------------------+ + + !!! note "Default values" + + Note that there we don't count "neg=False" items, shown in grey in the table. In EDS-NLP, + this is done by setting `defaults_values={"neg": False}` when creating the metric. This + is quite common in classification tasks, where one of the values is both the most common + and the "default" (hence the name of the parameter). Counting these values would likely + skew the micro-average metrics towards the default value. + + Precision, Recall and F1 (micro-average and per‐label) are computed as follows: + + - Precision: `p = |matched items of pred| / |pred|` + - Recall: `r = |matched items of ref| / |ref|` + - F1: `f = 2 / (1/p + 1/f)` + + This yields the following metrics: + + ```python + metric([ref], [pred]) + # Out: { + # 'micro': {'f': 0.57, 'p': 0.5, 'r': 0.67, 'tp': 2, 'support': 3, 'positives': 4, 'ap': 0.17}, + # 'neg': {'f': 0.67, 'p': 0.5, 'r': 1, 'tp': 1, 'support': 1, 'positives': 2, 'ap': 0.0}, + # 'carrier': {'f': 0.5, 'p': 0.5, 'r': 0.5, 'tp': 1, 'support': 2, 'positives': 2, 'ap': 0.25}, + # } + ``` + + Parameters + ---------- + span_getter : SpanGetterArg + The span getter to extract spans from each `Doc`. + attributes : Mapping[str, Union[bool, Sequence[str]]] + Map each attribute name to `True` (evaluate on all spans) or a sequence of + labels restricting which spans to test. + default_values : Dict[str, Any] + Attribute values to omit from micro‐average counts (e.g., common negative or + default labels). + include_falsy : bool + If `False`, ignore falsy values (e.g., `False`, `None`, `''`) in predictions + or gold when computing metrics; if `True`, count them. + micro_key : str + Key under which to store the micro‐averaged results across all attributes. + filter_expr : Optional[str] + A Python expression (using `doc`) to filter which examples are scored. + + Returns + ------- + Dict[str, Dict[str, float]] + A dictionary mapping each attribute name (and the `micro_key`) to its metrics: + + - `label` or micro_key : + + - `p` : precision + - `r` : recall + - `f` : F1 score + - `tp` : true positive count + - `support` : number of gold instances + - `positives` : number of predicted instances + - `ap` : [average precision](https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision) + """ # noqa: E501 + + attributes: Attributes + + def __init__( + self, + span_getter: SpanGetterArg, + attributes: AttributesArg = None, + qualifiers: AttributesArg = None, + default_values: Dict = {}, + include_falsy: bool = False, + micro_key: str = "micro", + filter_expr: Optional[str] = None, + ): + if qualifiers is not None: + warnings.warn( + "The `qualifiers` argument is deprecated. Use `attributes` instead.", + DeprecationWarning, + ) + self.span_getter = span_getter + self.attributes = attributes or qualifiers + self.default_values = default_values + self.include_falsy = include_falsy + self.micro_key = micro_key + self.filter_expr = filter_expr + + __init__.__doc__ = span_attribute_metric.__doc__ + + def __call__(self, *examples: Any): + """ + Compute the span attribute metrics for the given examples. + + Parameters + ---------- + examples : Examples + The examples to score, either a tuple of (golds, preds) or a list of + spacy.training.Example objects + + Returns + ------- + Dict[str, Dict[str, float]] + The scores for the attributes + """ + return span_attribute_metric( + examples, + span_getter=self.span_getter, + attributes=self.attributes, + default_values=self.default_values, + include_falsy=self.include_falsy, + micro_key=self.micro_key, + filter_expr=self.filter_expr, + ) + + +# For backward compatibility +span_classification_scorer = span_attribute_scorer = span_attribute_metric +create_span_attributes_scorer = SpanAttributeScorer = SpanAttributeMetric + +__all__ = [ + "span_attribute_metric", + "SpanAttributeMetric", +] diff --git a/edsnlp/metrics/span_attributes.py b/edsnlp/metrics/span_attributes.py deleted file mode 100644 index be1b2a948e..0000000000 --- a/edsnlp/metrics/span_attributes.py +++ /dev/null @@ -1,182 +0,0 @@ -import warnings -from collections import defaultdict -from typing import Any, Dict, Optional - -from edsnlp import registry -from edsnlp.metrics import Examples, average_precision, make_examples, prf -from edsnlp.utils.bindings import BINDING_GETTERS, Attributes, AttributesArg -from edsnlp.utils.span_getters import SpanGetterArg, get_spans - - -def span_attribute_metric( - examples: Examples, - span_getter: SpanGetterArg, - attributes: Attributes = None, - include_falsy: bool = False, - default_values: Dict = {}, - micro_key: str = "micro", - filter_expr: Optional[str] = None, - **kwargs: Any, -): - """ - Scores the attributes predictions between a list of gold and predicted spans. - - Parameters - ---------- - examples : Examples - The examples to score, either a tuple of (golds, preds) or a list of - spacy.training.Example objects - span_getter : SpanGetterArg - The span getter to use to extract the spans from the document - attributes : Sequence[str] - The attributes to use to score the spans - default_values: Dict - Values to dismiss when computing the micro-average per label. This is - useful to compute precision and recall for certain attributes that have - imbalanced value repartitions, such as "negation", "family related" - or "certainty" attributes. - include_falsy : bool - Whether to count predicted or gold occurrences of falsy values when computing - the metrics. If `False`, only the non-falsy values will be counted and matched - together. - micro_key : str - The key to use to store the micro-averaged results for spans of all types - filter_expr : Optional[str] - The filter expression to use to filter the documents - - Returns - ------- - Dict[str, float] - """ - if "qualifiers" in kwargs: - warnings.warn( - "The `qualifiers` argument of span_attribute_metric() is " - "deprecated. Use `attributes` instead.", - DeprecationWarning, - ) - assert attributes is None - attributes = kwargs.pop("qualifiers") - if attributes is None: - raise TypeError( - "span_attribute_metric() missing 1 required argument: 'attributes'" - ) - if kwargs: - raise TypeError( - f"span_attribute_metric() got unexpected keyword arguments: " - f"{', '.join(kwargs.keys())}" - ) - examples = make_examples(examples) - if filter_expr is not None: - filter_fn = eval(f"lambda doc: {filter_expr}") - examples = [eg for eg in examples if filter_fn(eg.reference)] - labels = defaultdict(lambda: (set(), set(), dict())) - labels["micro"] = (set(), set(), dict()) - total_pred_count = 0 - total_gold_count = 0 - - if not include_falsy: - default_values_ = defaultdict(lambda: False) - default_values_.update(default_values) - default_values = default_values_ - del default_values_ - for eg_idx, eg in enumerate(examples): - doc_spans = get_spans(eg.predicted, span_getter) - for span_idx, span in enumerate(doc_spans): - total_pred_count += 1 - for attr, span_filter in attributes.items(): - if not (span_filter is True or span.label_ in span_filter): - continue - getter_key = attr if attr.startswith("_.") else f"_.{attr}" - value = BINDING_GETTERS[getter_key](span) - top_val, top_p = max( - getattr(span._, "prob", {}).get(attr, {}).items(), - key=lambda x: x[1], - default=(value, 1.0), - ) - if (top_val or include_falsy) and default_values[attr] != top_val: - labels[attr][2][(eg_idx, span_idx, attr, top_val)] = top_p - labels[micro_key][2][(eg_idx, span_idx, attr, top_val)] = top_p - if (value or include_falsy) and default_values[attr] != value: - labels[micro_key][0].add((eg_idx, span_idx, attr, value)) - labels[attr][0].add((eg_idx, span_idx, attr, value)) - - doc_spans = get_spans(eg.reference, span_getter) - for span_idx, span in enumerate(doc_spans): - total_gold_count += 1 - for attr, span_filter in attributes.items(): - if not (span_filter is True or span.label_ in span_filter): - continue - getter_key = attr if attr.startswith("_.") else f"_.{attr}" - value = BINDING_GETTERS[getter_key](span) - if (value or include_falsy) and default_values[attr] != value: - labels[micro_key][1].add((eg_idx, span_idx, attr, value)) - labels[attr][1].add((eg_idx, span_idx, attr, value)) - - if total_pred_count != total_gold_count: - raise ValueError( - f"Number of predicted and gold spans differ: {total_pred_count} != " - f"{total_gold_count}. Make sure that you are running your span " - "attribute classification pipe on the gold annotations, and not spans " - "predicted by another NER pipe in your model." - ) - - return { - name: { - **prf(pred, gold), - "ap": average_precision(pred_with_prob, gold), - } - for name, (pred, gold, pred_with_prob) in labels.items() - } - - -@registry.metrics.register( - "eds.span_attribute", - deprecated=["eds.span_classification_scorer", "eds.span_attribute_scorer"], -) -class SpanAttributeMetric: - attributes: Attributes - - def __init__( - self, - span_getter: SpanGetterArg, - attributes: AttributesArg = None, - qualifiers: AttributesArg = None, - default_values: Dict = {}, - include_falsy: bool = False, - micro_key: str = "micro", - filter_expr: Optional[str] = None, - ): - if qualifiers is not None: - warnings.warn( - "The `qualifiers` argument is deprecated. Use `attributes` instead.", - DeprecationWarning, - ) - self.span_getter = span_getter - self.attributes = attributes or qualifiers - self.default_values = default_values - self.include_falsy = include_falsy - self.micro_key = micro_key - self.filter_expr = filter_expr - - __init__.__doc__ = span_attribute_metric.__doc__ - - def __call__(self, *examples: Any): - return span_attribute_metric( - examples, - span_getter=self.span_getter, - attributes=self.attributes, - default_values=self.default_values, - include_falsy=self.include_falsy, - micro_key=self.micro_key, - filter_expr=self.filter_expr, - ) - - -# For backward compatibility -span_classification_scorer = span_attribute_scorer = span_attribute_metric -create_span_attributes_scorer = SpanAttributeScorer = SpanAttributeMetric - -__all__ = [ - "span_attribute_metric", - "SpanAttributeMetric", -] diff --git a/mkdocs.yml b/mkdocs.yml index 35d3c30484..272d50ec99 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -146,6 +146,10 @@ nav: - concepts/pipeline.md - concepts/torch-component.md - concepts/inference.md + - Metrics: + - metrics/index.md + - metrics/ner.md + - metrics/span-attribute.md - Utilities: - utilities/index.md - utilities/tests/blocs.md @@ -171,15 +175,14 @@ extra: extra_css: - assets/stylesheets/extra.css - assets/stylesheets/cards.css - - assets/termynal/termynal.css + #- assets/termynal/termynal.css extra_javascript: - - https://cdn.jsdelivr.net/npm/vega@5 - - https://cdn.jsdelivr.net/npm/vega-lite@5 - - https://cdn.jsdelivr.net/npm/vega-embed@6 - - assets/termynal/termynal.js + #- https://cdn.jsdelivr.net/npm/vega@5 + #- https://cdn.jsdelivr.net/npm/vega-lite@5 + #- https://cdn.jsdelivr.net/npm/vega-embed@6 - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + # - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js watch: - contributing.md @@ -245,8 +248,9 @@ markdown_extensions: slugify: !!python/object/apply:pymdownx.slugs.slugify kwds: case: lower - - pymdownx.arithmatex: - generic: true + #- pymdownx.arithmatex: + # generic: true + - markdown_grid_tables - footnotes - md_in_html - attr_list diff --git a/pyproject.toml b/pyproject.toml index c391302ab6..47d1e83f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ docs-no-ml = [ "mkdocstrings-python~=1.1", "mkdocs-minify-plugin", "mkdocs-redirects>=1.2.1;python_version>='3.8'", + "markdown-grid-tables==0.4.0", "pybtex~=0.24.0", "pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR "astunparse", @@ -212,6 +213,7 @@ where = ["."] "eds.omop_dict2doc" = "edsnlp.data.converters:OmopDict2DocConverter" "eds.omop_doc2dict" = "edsnlp.data.converters:OmopDoc2DictConverter" "eds.ents_doc2dict" = "edsnlp.data.converters:EntsDoc2DictConverter" +"eds.markup_to_doc" = "edsnlp.data.converters:MarkupToDocConverter" # Deprecated (links to the same factories as above) "SOFA" = "edsnlp.pipes.ner.scores.sofa.factory:create_component" @@ -278,21 +280,22 @@ where = ["."] "linear" = "edsnlp.training.optimizer:LinearSchedule" [project.entry-points."spacy_scorers"] -"eds.ner_exact" = "edsnlp.metrics.ner:NerExactMetric" -"eds.ner_token" = "edsnlp.metrics.ner:NerTokenMetric" -"eds.ner_overlap" = "edsnlp.metrics.ner:NerOverlapMetric" -"eds.span_attributes" = "edsnlp.metrics.span_attributes:SpanAttributeMetric" -"eds.dep_parsing" = "edsnlp.metrics.dep_parsing:DependencyParsingMetric" +"eds.ner_exact" = "edsnlp.metrics.ner:NerExactMetric" +"eds.ner_token" = "edsnlp.metrics.ner:NerTokenMetric" +"eds.ner_overlap" = "edsnlp.metrics.ner:NerOverlapMetric" +"eds.span_attribute" = "edsnlp.metrics.span_attribute:SpanAttributeMetric" +"eds.dep_parsing" = "edsnlp.metrics.dep_parsing:DependencyParsingMetric" # Deprecated -"eds.ner_exact_metric" = "edsnlp.metrics.ner:NerExactMetric" -"eds.ner_token_metric" = "edsnlp.metrics.ner:NerTokenMetric" -"eds.ner_overlap_metric" = "edsnlp.metrics.ner:NerOverlapMetric" -"eds.span_attributes_metric" = "edsnlp.metrics.span_attributes:SpanAttributeMetric" -"eds.ner_exact_scorer" = "edsnlp.metrics.ner:NerExactMetric" -"eds.ner_token_scorer" = "edsnlp.metrics.ner:NerTokenMetric" -"eds.ner_overlap_scorer" = "edsnlp.metrics.ner:NerOverlapMetric" -"eds.span_attributes_scorer" = "edsnlp.metrics.span_attributes:SpanAttributeMetric" +"eds.ner_exact_metric" = "edsnlp.metrics.ner:NerExactMetric" +"eds.ner_token_metric" = "edsnlp.metrics.ner:NerTokenMetric" +"eds.ner_overlap_metric" = "edsnlp.metrics.ner:NerOverlapMetric" +"eds.span_attributes_metric" = "edsnlp.metrics.span_attributes:SpanAttributeMetric" +"eds.span_attributes" = "edsnlp.metrics.span_attribute:SpanAttributeMetric" +"eds.ner_exact_scorer" = "edsnlp.metrics.ner:NerExactMetric" +"eds.ner_token_scorer" = "edsnlp.metrics.ner:NerTokenMetric" +"eds.ner_overlap_scorer" = "edsnlp.metrics.ner:NerOverlapMetric" +"eds.span_attributes_scorer" = "edsnlp.metrics.span_attributes:SpanAttributeMetric" [project.entry-points."edsnlp_readers"] "spark" = "edsnlp.data:from_spark" diff --git a/tests/test_docs.py b/tests/test_docs.py index b6975e7b74..58de9a3cfa 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -4,6 +4,7 @@ import sys import textwrap import warnings +from math import isclose import catalogue import pytest @@ -30,6 +31,42 @@ assert len(url_to_code) > 50 +class nested_approx: + def __init__(self, value, rel=1e-12, abs=1e-12): + self._value, self._rel, self._abs = value, rel, abs + + def __eq__(self, other): + return self._match(self._value, other) + + def __req__(self, other): + return self._match(other, self._value) + + __hash__ = None # keep it un-hashable + + def _match(self, a, b): + if isinstance(a, (int, float)) and isinstance(b, (int, float)): + return isclose(a, b, rel_tol=self._rel, abs_tol=self._abs) + if isinstance(a, (list, tuple)): + return ( + isinstance(b, (list, tuple)) + and len(a) == len(b) + and all(self._match(x, y) for x, y in zip(a, b)) + ) + if isinstance(a, dict): + return ( + isinstance(b, dict) + and a.keys() == b.keys() + and all(self._match(a[k], b[k]) for k in a) + ) + return a == b + + def __repr__(self): + return f"nested_approx({self._value!r}, rel={self._rel}, abs={self._abs})" + + +pytest.nested_approx = nested_approx + + def printer(code: str) -> None: """ Prints a code bloc with lines for easier debugging. @@ -62,16 +99,22 @@ def insert_assert_statements(code): if stmt.end_lineno == lineno: if isinstance(stmt, ast.Expr): expected = textwrap.dedent(match.group(1)).replace("\n# ", "\n") + expected_s = expected begin = line_table[stmt.lineno - 1] if not (expected.startswith("'") or expected.startswith('"')): - expected = repr(expected) + expected_s = repr(expected) end = match.end() stmt_str = ast.unparse(stmt) if stmt_str.startswith("print("): stmt_str = stmt_str[len("print") :] repl = f"""\ -value = {stmt_str} -assert {expected} == str(value) +val = {stmt_str} +try: + import ast + expected = ast.literal_eval({expected_s}) +except (ValueError, SyntaxError): + expected = None +assert str(val) == {expected_s} or val == pytest.nested_approx(expected, 0.01, 0.01) """ replacements.append((begin, end, repl)) if isinstance(stmt, ast.For): @@ -83,7 +126,7 @@ def insert_assert_statements(code): repl = f"""\ printed = [] {stmt_str} -assert {expected} == printed +assert printed == {expected} """ replacements.append((begin, end, repl)) @@ -123,6 +166,8 @@ def reset_imports(): def test_code_blocks(url, tmpdir, reset_imports): code = url_to_code[url] code_with_asserts = """ +import pytest + def assert_print(*args, sep=" ", end="\\n", file=None, flush=False): printed.append((sep.join(map(str, args)) + end).rstrip('\\n'))