Skip to content

Commit

Permalink
Link HTMLized copies of invalid TEI files
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Sep 3, 2018
1 parent 1a8a217 commit ffb210f
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 9 deletions.
126 changes: 126 additions & 0 deletions src/main/tools/highlight-errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
import sys
from collections import defaultdict
from html import escape
from urllib.parse import urlparse

from lxml import etree
from pathlib import Path

import logging
logger = logging.getLogger(name=__name__)

NS=dict(f="http://www.faustedition.net/ns",
c="http://www.w3.org/ns/xproc-step",
err="http://www.w3.org/ns/xproc-error",
svrl="http://purl.oclc.org/dsdl/svrl")

PREFIX="""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Errors</title>
<style>
pre { margin-left: 2em; margin-bottom: 0; }
.markers marked { background: #fdd; color: red; font-weight: bold; border: 2px solid #fdd; }
.l { float: left; position: relative; left: -2em; width: 1.5em; text-align: right; color: gray; }
.l:target { font-weight: bold; color: black; }
div.errors { border: 1px solid red; border-top: none; background: #fdd;}
div.errors p { margin-top: 0; margin-bottom: 0.5ex; }
div.errors .message { font-weight: bold; }
.pos { color: gray; font-family: monospace; }
</style>
</head>
<body>
<pre>
"""
SUFFIX="""
</pre>
</div>
</body>
</html>
"""

def strip_common_prefix(source, reference, additional_components=0):
source = Path(source).resolve()
reference = Path(reference).resolve()
for i, (source_part, reference_part) in enumerate(zip(source.parts, reference.parts)):
if source_part != reference_part:
break
return Path(*source.parts[i+additional_components:])


def marker_string(columns):
result = ""
last_column = 0
for column in columns:
c = str(column)
result += ' ' * (column - len(c) - last_column) + f'<marked>{c}</marked>'
last_column = column
return result


def annotate_file(invalid_file, errors, out_path=None, strip_components=0):
if out_path is None:
out_path = Path()
if not isinstance(out_path, Path):
out_path = Path(urlparse(out_path).path)
out_file = out_path / strip_common_prefix(invalid_file.with_suffix('.html'), out_path, strip_components)
out_file.parent.mkdir(parents=True, exist_ok=True)

logger.debug('Annotating %s to %s', invalid_file, out_file)

with invalid_file.open(encoding='utf-8') as xml:
with out_file.open('wt', encoding='utf-8') as out:
out.write(PREFIX)
for lineno, raw_line in enumerate(xml, start=1):
line = escape(raw_line[:-1])
out.write(f'<span id="l{lineno}" class="l">{lineno}</span>{line}\n')
if lineno in errors:
current_errors = errors[lineno]
markers = marker_string(error['column'] for error in current_errors)
out.write(f'<span class="l"> </span><span class="markers">{markers}</span></pre>\n<div class="errors">')
for error in current_errors:
out.write(('<p id="l{line}c{column}" class="message"><span class="pos">{line}:{column}</span> {message}</p>\n' +
'<p class="resolution">{resolution}</p>\n').format_map(error))
out.write('</div><pre>')
out.write(SUFFIX)


def parse_errors(fn):
logger.debug('Parsing errors from %s ...', fn)
if str(fn) == '-':
et = etree.parse(sys.stdin)
else:
et = etree.parse(str(fn))
for validation_error in et.xpath('//f:validation-error', namespaces=NS):
url = validation_error.get('filename')
invalid_file = Path(urlparse(url).path)
errors = defaultdict(list)
for error_xml in validation_error.xpath('.//c:error', namespaces=NS):
error = dict(line=int(error_xml.get('line')),
column=int(error_xml.get('column')),
message=' '.join(error_xml.xpath('c:message/text()', namespaces=NS)),
resolution=' '.join(error_xml.xpath('c:resolution/text()', namespaces=NS)))
errors[error['line']].append(error)
yield invalid_file, errors


def _main():
import argparse
parser = argparse.ArgumentParser(description='Write annotated XML files for faust-schema validation output')
parser.add_argument('report', help='XML report containing validation results')
parser.add_argument('-o', '--output-dir', metavar='DIRECTORY', default='.')
parser.add_argument('-s', '--strip-components', default=0, type=int,
help='number of additional path components to remove from source paths to form the output file name')
options = parser.parse_args()
logger.debug(options)

for file, errors in parse_errors(Path(options.report)):
annotate_file(file, errors, options.output_dir, options.strip_components)


if __name__ == '__main__':
logging.basicConfig(level=logging.INFO,filename='target/highlight-errors.log')
_main()
4 changes: 2 additions & 2 deletions src/main/xproc/convert-transcripts.xpl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<p:for-each>
<p:iteration-source select="//c:file"/>
<p:variable name="filename" select="p:resolve-uri(/c:file/@name)"/>
<p:variable name="out" select="p:resolve-uri(replace($filename, $_xmlroot, 'converted'), $_target)"/>
<p:variable name="out" select="p:resolve-uri(replace($filename, $_xmlroot, 'converted/transcript'), $_target)"/>

<!-- <cx:message>
<p:with-option name="message" select="concat($filename, ' → ', $out)"></p:with-option>
Expand All @@ -46,4 +46,4 @@

</p:for-each>

</p:declare-step>
</p:declare-step>
28 changes: 28 additions & 0 deletions src/main/xproc/validate-all.xpl
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,34 @@
<p:store method="xhtml" indent="true">
<p:with-option name="href" select="resolve-uri('report/index.html', $_target)"/>
</p:store>

<p:directory-list include-filter=".*\.xml$">
<p:with-option name="path" select="resolve-uri('report', $_target)"/>
</p:directory-list>
<p:for-each>
<p:iteration-source select="//c:file"/>
<p:variable name="report-uri" select="resolve-uri(/c:file/@name, resolve-uri('report/', $_target))"/>
<cx:message>
<p:with-option name="message" select="concat('Annotating XMLs for ', $report-uri)"/>
</cx:message>
<p:try>
<p:group>
<p:exec command="src/main/tools/highlight-errors.py" source-is-xml="true" errors-is-xml="false" result-is-xml="false">
<p:with-option name="args" select="string-join(('-s1', '-o', resolve-uri('report/', $_target), $report-uri), ' ')"/>
<p:input port="source"><p:empty/></p:input>
</p:exec>
</p:group>
<p:catch name="catch-ann-xml">
<cx:message>
<p:with-option name="message" select="concat('Failed to load ', $report-uri, ': ', .)"/>
<p:input port="source"><p:pipe port="error" step="catch-ann-xml"/></p:input>
</cx:message>
<p:identity><p:input port="source"><p:empty/></p:input></p:identity>
</p:catch>
</p:try>

</p:for-each>

<p:sink/>

</p:declare-step>
5 changes: 2 additions & 3 deletions src/main/xproc/validate-xml.xpl
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,14 @@
<p:input port="source"><p:pipe port="result" step="wrap-errors"/></p:input>
<p:input port="stylesheet"><p:document href="validation-report.xsl"/></p:input>
<p:input port="parameters"><p:pipe port="result" step="in-scope-names"/></p:input>
<p:with-param name="linkroot" select="resolve-uri('report/xml', $_target)"/>
</p:xslt>

<cx:message>
<p:with-option name="message" select="concat('Storing report to ', $report)"/>
</cx:message>
<p:store method="xhtml" indent="true">
<p:with-option name="href" select="$report"/>
</p:store>


</p:store>

</p:declare-step>
15 changes: 11 additions & 4 deletions src/main/xproc/validation-report.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
then substring($uri, string-length($_xmlroot) +
(if (ends-with('/', $_xmlroot)) then 1 else 2))
else $uri"/>
<xsl:variable name="link" select="if ($linkroot != '') then resolve-uri($relpath, $linkroot) else $uri"/>
<xsl:variable name="link" select="replace(if ($linkroot != '') then resolve-uri($relpath, $linkroot) else $uri, '\.xml$', '.html')"/>
<a href="{$link}"><xsl:value-of select="$relpath"/></a>
</xsl:function>

Expand Down Expand Up @@ -137,12 +137,16 @@
<xsl:for-each-group select="current-group()" group-by="ancestor::f:validation-error/@filename">
<li>
<xsl:sequence select="f:linkxml(current-grouping-key())"/>:
<xsl:variable name="href" select="f:linkxml(current-grouping-key())/@href"/>
<span class="locations">
<xsl:value-of select="concat(count(current-group()), '×: ')"/>
<xsl:for-each select="current-group()">
<xsl:sort select="number(@line)"/>
<xsl:sort select="number(@column)"/>
<xsl:value-of select="concat(@line, ':', @column, ' ')"/>
<a href="{$href}#l{@line}">
<xsl:value-of select="concat(@line, ':', @column, ' ')"/>
</a>

</xsl:for-each>
</span>
</li>
Expand Down Expand Up @@ -173,9 +177,10 @@
</xsl:template>

<xsl:template match="f:validation-error[c:errors]">
<xsl:variable name="href" select="f:linkxml(@filename)/@href"/>
<dt><a href="{@filename}"><xsl:value-of select="@filename"/></a></dt>
<dd>
<dl class="individual-errors">
<dl class="individual-errors">
<xsl:for-each-group select=".//c:error" group-by="string-join((c:message, c:resolution), '|')">
<xsl:sort select="number(current-group()[1]/@line)"/>
<dt><xsl:value-of select="c:message"/></dt>
Expand All @@ -186,7 +191,9 @@
<xsl:for-each select="current-group()">
<xsl:sort select="number(@line)"/>
<xsl:sort select="number(@column)"/>
<xsl:value-of select="concat(@line, ':', @column, ' ')"/>
<a href="{$href}#l{@line}">
<xsl:value-of select="concat(@line, ':', @column, ' ')"/>
</a>
</xsl:for-each>
</p>
</dd>
Expand Down

0 comments on commit ffb210f

Please sign in to comment.