Skip to content

Commit

Permalink
installation: Invenio dependency update
Browse files Browse the repository at this point in the history
* FIX Updates minimum dependencies of Invenio packages to
  newer versions.

* Fixes issues related to application context.

* Updates Travis CI and PyTest setup to latest Invenio package
  standards.

* Adds missing files to MANIFEST.in.

Signed-off-by: Jan Aage Lavik <jan.age.lavik@cern.ch>
  • Loading branch information
jalavik committed Feb 1, 2016
1 parent eca6684 commit f249653
Show file tree
Hide file tree
Showing 11 changed files with 95 additions and 56 deletions.
35 changes: 35 additions & 0 deletions .travis.invenio.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Travis-CI configuration."""

import getpass

CFG_BIBSCHED_PROCESS_USER = getpass.getuser()

DEBUG = False
SECRET_KEY = 'MY_SECRET'

# Disable all automatic asset building - false is /usr/bin/false.
ASSETS_AUTO_BUILD = False

PACKAGES = [
'invenio_classifier',
'invenio_base',
]
8 changes: 5 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio
# Copyright (C) 2015 CERN
# Copyright (C) 2015, 2016 CERN
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -47,13 +47,14 @@ python:

before_install:
- "travis_retry pip install --upgrade pip"
- "travis_retry pip install mock twine wheel"
- "travis_retry pip install check-manifest mock twine wheel coveralls"
- "python requirements.py --extras=$REXTRAS --level=min > .travis-lowest-requirements.txt"
- "python requirements.py --extras=$REXTRAS --level=pypi > .travis-release-requirements.txt"
- "python requirements.py --extras=$REXTRAS --level=dev > .travis-devel-requirements.txt"
- "mkdir -p ${VIRTUAL_ENV}/var/invenio.base-instance/"
- "cp .travis.invenio.cfg ${VIRTUAL_ENV}/var/invenio.base-instance/invenio.cfg"

install:
- "travis_retry pip install Invenio"
- "travis_retry pip install -r .travis-$REQUIREMENTS-requirements.txt --allow-all-external"
- "travis_retry pip install -e .[$REXTRAS]"

Expand All @@ -62,6 +63,7 @@ before_script:
- "inveniomanage database create --quiet || echo ':('"

script:
- "check-manifest --ignore .travis-\\*-requirements.txt"
- "sphinx-build -qnN docs docs/_build/html"
- "python setup.py test"

Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include *.py
include *.rst
include *.txt
include .dockerignore .editorconfig
include .dockerignore .editorconfig .travis.invenio.cfg
include LICENSE
include babel.ini
include pytest.ini
Expand Down
20 changes: 14 additions & 6 deletions invenio_classifier/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2014, 2015 CERN.
# Copyright (C) 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -27,7 +27,6 @@
from flask import current_app

from invenio_base.globals import cfg
from invenio_utils.filedownload import download_url

from .engine import (
clean_before_output,
Expand All @@ -50,11 +49,14 @@

def output_keywords_for_sources(
input_sources, taxonomy_name, output_mode="text",
output_limit=cfg['CLASSIFIER_DEFAULT_OUTPUT_NUMBER'], spires=False,
output_limit=None, spires=False,
match_mode="full", no_cache=False, with_author_keywords=False,
rebuild_cache=False, only_core_tags=False, extract_acronyms=False,
**kwargs):
"""Output the keywords for each source in sources."""
if output_limit is None:
output_limit = cfg['CLASSIFIER_DEFAULT_OUTPUT_NUMBER']

# Inner function which does the job and it would be too much work to
# refactor the call (and it must be outside the loop, before it did
# not process multiple files)
Expand Down Expand Up @@ -108,6 +110,7 @@ def process_lines():
process_lines()
else:
# Treat as a URL.
from invenio_utils.filedownload import download_url
local_file = download_url(entry)
text_lines, dummy = get_plaintext_document_body(local_file)
if text_lines:
Expand All @@ -117,13 +120,16 @@ def process_lines():

def get_keywords_from_local_file(
local_file, taxonomy_name, output_mode="text",
output_limit=cfg["CLASSIFIER_DEFAULT_OUTPUT_NUMBER"], spires=False,
output_limit=None, spires=False,
match_mode="full", no_cache=False, with_author_keywords=False,
rebuild_cache=False, only_core_tags=False, extract_acronyms=False):
"""Output keywords reading a local file.
Arguments and output are the same as for :see: get_keywords_from_text().
"""
if output_limit is None:
output_limit = cfg['CLASSIFIER_DEFAULT_OUTPUT_NUMBER']

current_app.logger.info(
"Analyzing keywords for local file %s." % local_file)
text_lines = text_lines_from_local_file(local_file)
Expand All @@ -142,8 +148,7 @@ def get_keywords_from_local_file(


def get_keywords_from_text(text_lines, taxonomy_name, output_mode="text",
output_limit=cfg[
"CLASSIFIER_DEFAULT_OUTPUT_NUMBER"],
output_limit=None,
spires=False, match_mode="full", no_cache=False,
with_author_keywords=False, rebuild_cache=False,
only_core_tags=False, extract_acronyms=False):
Expand All @@ -165,6 +170,9 @@ def get_keywords_from_text(text_lines, taxonomy_name, output_mode="text",
(single_keywords, composite_keywords, author_keywords, acronyms)
for other output modes it returns formatted string
"""
if output_limit is None:
output_limit = cfg['CLASSIFIER_DEFAULT_OUTPUT_NUMBER']

cache = get_cache(taxonomy_name)
if not cache:
set_cache(taxonomy_name,
Expand Down
21 changes: 16 additions & 5 deletions invenio_classifier/engine.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2013, 2014, 2015 CERN.
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2013, 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -197,9 +197,9 @@ def build_marc(recid, single_keywords, composite_keywords,


def _output_marc(output_complete, categories,
kw_field=cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"],
auth_field=cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"],
acro_field=cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"],
kw_field=None,
auth_field=None,
acro_field=None,
provenience='Classifier'):
"""Output the keywords in the MARCXML format.
Expand All @@ -215,6 +215,15 @@ def _output_marc(output_complete, categories,
assigned the contents of the field
:return: string, formatted MARC
"""
if kw_field is None:
kw_field = cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"]

if auth_field is None:
auth_field = cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"]

if acro_field is None:
acro_field = cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"]

kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n'
' <subfield code="2">%s</subfield>\n'
' <subfield code="a">%s</subfield>\n'
Expand Down Expand Up @@ -249,7 +258,9 @@ def _output_marc(output_complete, categories,

def _output_complete(skw_matches=None, ckw_matches=None, author_keywords=None,
acronyms=None, spires=False, only_core_tags=False,
limit=cfg["CLASSIFIER_DEFAULT_OUTPUT_NUMBER"]):
limit=None):
if limit is None:
limit = cfg["CLASSIFIER_DEFAULT_OUTPUT_NUMBER"]

if limit:
resized_skw = skw_matches[0:limit]
Expand Down
10 changes: 4 additions & 6 deletions invenio_classifier/keyworder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 CERN.
# Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -36,11 +36,6 @@
from .errors import OntologyError


_MAXIMUM_SEPARATOR_LENGTH = max([len(_separator)
for _separator in
cfg["CLASSIFIER_VALID_SEPARATORS"]])


def get_single_keywords(skw_db, fulltext):
"""Find single keywords in the fulltext.
Expand Down Expand Up @@ -306,6 +301,9 @@ def get_author_keywords(skw_db, ckw_db, fulltext):

def _get_ckw_span(fulltext, spans):
"""Return the span of the composite keyword if it is valid."""
_MAXIMUM_SEPARATOR_LENGTH = max([len(_separator)
for _separator in
cfg["CLASSIFIER_VALID_SEPARATORS"]])
if spans[0] < spans[1]:
words = (spans[0], spans[1])
dist = spans[1][0] - spans[0][1]
Expand Down
2 changes: 1 addition & 1 deletion invenio_classifier/manage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down
6 changes: 4 additions & 2 deletions invenio_classifier/reader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 CERN.
# Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -46,6 +46,8 @@
from invenio_base.globals import cfg
from invenio_utils.url import make_invenio_opener

from werkzeug.local import LocalProxy

import rdflib

from six import iteritems
Expand All @@ -54,7 +56,7 @@
from .errors import TaxonomyError
from .registry import taxonomies

urlopen = make_invenio_opener('classifier').open
urlopen = LocalProxy(lambda: make_invenio_opener('classifier').open)

_contains_digit = re.compile("\d")
_starts_with_non = re.compile("(?i)^non[a-z]")
Expand Down
4 changes: 2 additions & 2 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand All @@ -23,6 +23,6 @@
# as an Intergovernmental Organization or submit itself to any jurisdiction.

[pytest]
addopts = --clearcache --pep8 --ignore=docs --cov=invenio_classifier --cov-report=term-missing
addopts = --pep8 --ignore=docs --cov=invenio_classifier --cov-report=term-missing
pep8ignore =
tests/* ALL
23 changes: 11 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -36,18 +36,20 @@
requirements = [
'Flask>=0.10.1',
'six>=1.7.2',
'invenio-base>=0.2.1',
'invenio-ext>=0.2.1',
'invenio-utils>=0.1.1',
'invenio-base>=0.3.1,<1.0.0',
'invenio-ext>=0.3.1,<1.0.0',
'invenio-utils>=0.2.0,<1.0.0',
'rdflib>=4.2.1',
]

test_requirements = [
'unittest2>=1.1.0',
'Flask-Testing>=0.4.2',
'pytest>=2.7.0',
'pytest-cov>=1.8.0',
'pytest-pep8>=1.0.6',
'coverage>=3.7.1',
'Flask_Testing>=0.4.2',
'pytest>=2.8.0',
'pytest_cov>=2.1.0',
'pytest_pep8>=1.0.6',
'coverage>=4.0.0',
'invenio-testing>=0.1.0',
]


Expand Down Expand Up @@ -79,9 +81,6 @@ def run_tests(self):
"""Run tests."""
# import here, cause outside the eggs aren't loaded
import pytest
import _pytest.config
pm = _pytest.config.get_plugin_manager()
pm.consider_setuptools_entrypoints()
errno = pytest.main(self.pytest_args)
sys.exit(errno)

Expand Down
20 changes: 2 additions & 18 deletions tests/test_classifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2010, 2011, 2013, 2014, 2015 CERN.
# Copyright (C) 2010, 2011, 2013, 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -29,10 +29,8 @@
PkgResourcesDirDiscoveryRegistry,
RegistryProxy,
)
from invenio.testsuite import (
from invenio_testing import (
InvenioTestCase,
make_test_suite,
run_test_suite,
)


Expand All @@ -52,13 +50,6 @@ class ClassifierTestCase(InvenioTestCase):

"""Basic test class used for classifier tests."""

@property
def config(self):
from invenio_base.config import PACKAGES
default_config = super(ClassifierTestCase, self).config
default_config["PACKAGES"] = PACKAGES + ["invenio_classifier"]
return default_config

def setUp(self):
"""Initialize stuff."""
self.taxonomy_name = "test"
Expand Down Expand Up @@ -251,10 +242,3 @@ def test_cache_accessibility(self):
cache = reader._get_cache_path(name)
os.remove(taxonomy_path)
os.remove(cache)


TEST_SUITE = make_test_suite(ClassifierTest)


if __name__ == '__main__':
run_test_suite(TEST_SUITE)

0 comments on commit f249653

Please sign in to comment.