Permalink
Browse files

BibCheck: new BibCheck module

* Creates a new BibCheck module in python with configurable plugins

* Adds a example set of rules in rules.cfg

* Creates doi, dates, enum, files, texkey, regexp, mandatory, utf-8,
  url, trailing_space, regexp_replace, rename_tag and rename_subfield plugins

* Creates unit tests for BibCheck and the plugins

* Removes old BibCheck web interface and documentation

* Adds API so other modules can call bibcheck

Tested-by: Javier Martin Montull <javier.martin.montull@cern.ch>
Reviewed-by: Javier Martin Montull <javier.martin.montull@cern.ch>
  • Loading branch information...
David Bengoa authored and kaplun committed Jun 24, 2013
1 parent 5138bed commit 218d7fa71d7a3738944a7c262ba2f02e52c87210
Showing with 2,157 additions and 393 deletions.
  1. +1 −0 .gitignore
  2. +3 −0 config/invenio.conf
  3. +4 −3 configure.ac
  4. +2 −2 modules/bibcheck/Makefile.am
  5. +3 −6 modules/bibcheck/{doc/admin → bin}/Makefile.am
  6. +41 −0 modules/bibcheck/bin/bibcheck.in
  7. +1 −1 modules/bibcheck/doc/Makefile.am
  8. +0 −38 modules/bibcheck/doc/admin/bibcheck-admin-guide.webdoc
  9. +1 −1 modules/bibcheck/etc/Makefile.am
  10. +20 −0 modules/bibcheck/etc/rules.cfg
  11. +0 −1 modules/bibcheck/etc/sample.cfg
  12. +11 −6 modules/bibcheck/{web/admin → lib}/Makefile.am
  13. +56 −0 modules/bibcheck/lib/bibcheck.py
  14. +171 −0 modules/bibcheck/lib/bibcheck_plugins_unit_tests.py
  15. +751 −0 modules/bibcheck/lib/bibcheck_task.py
  16. +194 −0 modules/bibcheck/lib/bibcheck_unit_tests.py
  17. +38 −0 modules/bibcheck/lib/plugins/Makefile.am
  18. 0 modules/bibcheck/lib/plugins/__init__.py
  19. +146 −0 modules/bibcheck/lib/plugins/crossref_checker.py
  20. +80 −0 modules/bibcheck/lib/plugins/dates.py
  21. +45 −0 modules/bibcheck/lib/plugins/doi.py
  22. +31 −0 modules/bibcheck/lib/plugins/enum.py
  23. +65 −0 modules/bibcheck/lib/plugins/files.py
  24. +13 −4 modules/bibcheck/{web/Makefile.am → lib/plugins/mandatory.py}
  25. +34 −0 modules/bibcheck/lib/plugins/regexp.py
  26. +36 −0 modules/bibcheck/lib/plugins/regexp_replace.py
  27. +28 −0 modules/bibcheck/lib/plugins/rename_subfield.py
  28. +29 −0 modules/bibcheck/lib/plugins/rename_tag.py
  29. +48 −0 modules/bibcheck/lib/plugins/texkey.py
  30. +36 −0 modules/bibcheck/lib/plugins/trailing_space.py
  31. +82 −0 modules/bibcheck/lib/plugins/url.py
  32. +31 −0 modules/bibcheck/lib/plugins/utf8.py
  33. +0 −325 modules/bibcheck/web/admin/bibcheckadmin.py
  34. +1 −1 modules/bibsched/lib/bibtask_config.py
  35. +103 −2 modules/miscutil/lib/crossrefutils.py
  36. +2 −3 modules/miscutil/lib/sequtils_texkey.py
  37. +39 −0 modules/miscutil/lib/upgrades/invenio_2013_06_20_new_bibcheck_rules_table.py
  38. +1 −0 modules/miscutil/sql/tabbibclean.sql
  39. +9 −0 modules/miscutil/sql/tabcreate.sql
  40. +1 −0 modules/miscutil/sql/tabdrop.sql
View
@@ -92,6 +92,7 @@ modules/websubmit/bin/websubmitadmin
modules/bibcirculation/bin/bibcircd
modules/bibcatalog/bin/bibcatalog
modules/pdfchecker/bin/arxiv-pdf-checker
modules/bibcheck/bin/bibcheck
tags
config.status.lineno
configure.lineno
View
@@ -2315,6 +2315,9 @@ CFG_CROSSREF_USERNAME =
## to the Crossref site.
CFG_CROSSREF_PASSWORD =
## CFG_CROSSREF_EMAIL -- crossref query services email
CFG_CROSSREF_EMAIL =
#####################################
## Part 31: WebLinkback parameters ##
#####################################
View
@@ -588,11 +588,12 @@ AC_CONFIG_FILES([config.nice \
modules/bibcatalog/lib/ticket_templates/Makefile \
modules/bibcheck/Makefile \
modules/bibcheck/doc/Makefile \
modules/bibcheck/doc/admin/Makefile \
modules/bibcheck/doc/hacking/Makefile \
modules/bibcheck/etc/Makefile \
modules/bibcheck/web/Makefile \
modules/bibcheck/web/admin/Makefile \
modules/bibcheck/bin/Makefile \
modules/bibcheck/lib/Makefile \
modules/bibcheck/lib/plugins/Makefile \
modules/bibcheck/bin/bibcheck \
modules/bibcirculation/Makefile \
modules/bibcirculation/bin/Makefile \
modules/bibcirculation/bin/bibcircd \
@@ -1,5 +1,5 @@
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -15,6 +15,6 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
SUBDIRS = doc etc web
SUBDIRS = doc etc lib bin
CLEANFILES = *~
@@ -1,5 +1,5 @@
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -15,10 +15,7 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
webdoclibdir = $(libdir)/webdoc/invenio/admin
webdoclib_DATA = bibcheck-admin-guide.webdoc
EXTRA_DIST = $(webdoclib_DATA)
bin_SCRIPTS = bibcheck
EXTRA_DIST = bibcheck.in
CLEANFILES = *~ *.tmp
@@ -0,0 +1,41 @@
#!@PYTHON@
## -*- mode: python; coding: utf-8; -*-
##
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
"bibcheck" is used to check a set of records against a
configurable set of rules. A rule consists of a query, a
checker and an amender. The set of records that
match the query are checked with the checker and the records
that don't pass the test will be passed to the amender. An
amender can try to fix the record automatically or request a
human to fix the record.
The checkers and amenders are loaded via a plug-in system, so
it's easy to add new checkers or amenders.
"""
from invenio.bibcheck_task import main as cli_main
if __name__ == '__main__':
try:
cli_main()
except KeyboardInterrupt:
# Exit cleanly
print 'Interrupted'
@@ -15,4 +15,4 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
SUBDIRS = admin hacking
SUBDIRS = hacking

This file was deleted.

Oops, something went wrong.
@@ -17,7 +17,7 @@
etcdir = $(sysconfdir)/bibcheck
etc_DATA = sample.cfg
etc_DATA = rules.cfg
EXTRA_DIST = $(etc_DATA)
@@ -0,0 +1,20 @@
[check_mandatory_fields]
check=mandatory
check.fields = ["001%%_"]
[check_utf8]
check=utf8
[trailing_space]
check=trailing_space
check.strip = true
check.normalize_spaces = true
check.fields = ["100%%a"]
[check_doi]
check=doi
[check_xref]
check=crossref_checker

This file was deleted.

Oops, something went wrong.
@@ -1,5 +1,5 @@
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
## Copyright (C) 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -9,16 +9,21 @@
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
webappdir = $(localstatedir)/www/admin/bibcheck
SUBDIRS = plugins
webapp_DATA = bibcheckadmin.py
pylibdir = $(libdir)/python/invenio
EXTRA_DIST = $(webapp_DATA)
pylib_DATA = bibcheck_task.py \
bibcheck.py \
bibcheck_unit_tests.py \
bibcheck_plugins_unit_tests.py
CLEANFILES = *~ *.tmp
EXTRA_DIST = $(pylib_DATA)
CLEANFILES = *~ *.tmp *.pyc
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibCheck API
This API lets other modules interact with bibcheck.
"""
from invenio import bibcheck_task
def check_record(record, enabled_rules=None):
"""
Check a record agains some bibcheck rules.
@param record: Record to check
@type record: recstruct
@param enabled_rules: List of rules to run. Default None (run all rules)
@type enabled_rules: list
@returns: AmendableRecord with the list of errors/amendments
"""
plugins = bibcheck_task.load_plugins()
rules = bibcheck_task.load_rules(plugins)
record = bibcheck_task.AmendableRecord(record)
rule_names = set(rules.keys())
if enabled_rules is not None:
rule_names.intersection_update(enabled_rules)
for rule_name in rule_names:
rule = rules[rule_name]
record.set_rule(rule)
plugin = plugins[rule["check"]]
if plugin["batch"]:
plugin["check_records"]([record], **rule["checker_params"])
else:
plugin["check_record"](record, **rule["checker_params"])
return record
Oops, something went wrong.

0 comments on commit 218d7fa

Please sign in to comment.