Skip to content

Commit

Permalink
tests: 100% coverage
Browse files Browse the repository at this point in the history
* Improves overall test coverage to 100%. (closes #21)

Signed-off-by: Jan Aage Lavik <jan.age.lavik@cern.ch>
  • Loading branch information
jalavik committed Jun 3, 2016
1 parent 23379e7 commit a9bc75d
Show file tree
Hide file tree
Showing 9 changed files with 326 additions and 20 deletions.
6 changes: 4 additions & 2 deletions invenio_oaiharvester/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ def identifier_extraction_from_string(
def get_identifier_names(identifiers):
"""Return list of identifiers from a comma-separated string."""
if identifiers is not None:
return [s.strip() for s in identifiers.split(',')]
if not isinstance(identifiers, (list, tuple)):
identifiers = identifiers.split(',')
return [s.strip() for s in identifiers]
return []


Expand Down Expand Up @@ -196,7 +198,7 @@ def write_to_dir(records, output_dir, max_records=1000, encoding='utf-8'):
f = codecs.open(files_created[0], 'w+', encoding=encoding)
for record in records:
total += 1
if total % max_records == 0:
if total > 1 and total % max_records == 0:
# we need a new file to write to
f.close()
files_created.append(create_file_name(output_path))
Expand Down
2 changes: 0 additions & 2 deletions requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@
import sys

import mock

import pkg_resources

import setuptools


Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
tests_require = [
'check-manifest>=0.25',
'coverage>=4.0',
'isort>=4.2.2',
'isort==4.2.2',
'mock>=1.0.0',
'pydocstyle>=1.0.0',
'pytest-cache>=1.0',
'pytest-cov>=1.8.0',
Expand Down
23 changes: 19 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import pytest
from flask import Flask
from flask_celeryext import FlaskCeleryExt
from flask_cli import FlaskCLI
from flask_cli import FlaskCLI, ScriptInfo
from invenio_db import InvenioDB, db

from invenio_oaiharvester import InvenioOAIHarvester
Expand Down Expand Up @@ -73,7 +73,13 @@ def teardown():
return app


@pytest.fixture
@pytest.fixture()
def script_info(app):
"""Get ScriptInfo object for testing CLI."""
return ScriptInfo(create_app=lambda info: app)


@pytest.fixture()
def sample_config(app):
source_name = "arXiv"
with app.app_context():
Expand All @@ -88,7 +94,7 @@ def sample_config(app):
return source_name


@pytest.fixture
@pytest.fixture()
def sample_record_xml():
raw_xml = open(os.path.join(
os.path.dirname(__file__),
Expand All @@ -97,7 +103,7 @@ def sample_record_xml():
return raw_xml


@pytest.fixture
@pytest.fixture()
def sample_record_xml_oai_dc():
raw_xml = open(os.path.join(
os.path.dirname(__file__),
Expand All @@ -106,6 +112,15 @@ def sample_record_xml_oai_dc():
return raw_xml


@pytest.fixture()
def sample_empty_set():
raw_xml = open(os.path.join(
os.path.dirname(__file__),
"data/sample_empty_response.xml"
)).read()
return raw_xml


@pytest.fixture
def sample_list_xml():
raw_physics_xml = open(os.path.join(
Expand Down
7 changes: 7 additions & 0 deletions tests/data/sample_empty_response.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2016-06-02T19:13:02Z</responseDate>
<request verb="ListRecords" until="2015-01-17" from="2015-01-17" metadataPrefix="arXiv" set="physics:hep-lat">http://export.arxiv.org/oai2</request>
<error code="noRecordsMatch">
No records match the paramaters specified for this selective harvesting request.
</error>
</OAI-PMH>
128 changes: 128 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Test for utilities used by OAI harvester."""

from __future__ import absolute_import, print_function

import re

import responses
from click.testing import CliRunner

from invenio_oaiharvester.cli import harvest


@responses.activate
def test_cli_harvest_idents(script_info, sample_record_xml, tmpdir):
"""Test create user CLI."""
responses.add(
responses.GET,
'http://export.arxiv.org/oai2',
body=sample_record_xml,
content_type='text/xml'
)

runner = CliRunner()
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-i', 'oai:arXiv.org:1507.03011'],
obj=script_info
)
assert result.exit_code == 0

# Cannot use dates and identifiers
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-f', '2015-01-17',
'-i', 'oai:arXiv.org:1507.03011'],
obj=script_info
)
assert result.exit_code != 0

# Queue it
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-i', 'oai:arXiv.org:1507.03011',
'--enqueue'],
obj=script_info
)
assert result.exit_code == 0

# Save it directory
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-i', 'oai:arXiv.org:1507.03011',
'-d', tmpdir.dirname],
obj=script_info
)
assert result.exit_code == 0

# Missing URL
result = runner.invoke(
harvest,
['-m', 'arXiv',
'-i', 'oai:arXiv.org:1507.03011'],
obj=script_info
)
assert result.exit_code != 0


@responses.activate
def test_cli_harvest_list(script_info, sample_empty_set):
"""Test create user CLI."""
responses.add(
responses.GET,
re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
body=sample_empty_set,
content_type='text/xml'
)

runner = CliRunner()
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-s', 'physics',
'-f', '2015-01-17',
'-t', '2015-01-17'],
obj=script_info
)
assert result.exit_code == 0

# Queue it
result = runner.invoke(
harvest,
['-u', 'http://export.arxiv.org/oai2',
'-m', 'arXiv',
'-s', 'physics',
'-f', '2015-01-17',
'-t', '2015-01-17',
'--enqueue'],
obj=script_info
)
assert result.exit_code == 0
39 changes: 39 additions & 0 deletions tests/test_harvesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import responses

from invenio_oaiharvester import get_records, list_records
from invenio_oaiharvester.errors import WrongDateCombination


@responses.activate
Expand Down Expand Up @@ -70,6 +71,22 @@ def test_raise_missing_info(app):
with app.app_context():
with pytest.raises(NameOrUrlMissing):
list_records()
with pytest.raises(NameOrUrlMissing):
get_records([])


def test_raise_wrong_date(app):
"""Check harvesting of records from multiple setspecs."""
with app.app_context():
with pytest.raises(WrongDateCombination):
list_records(
metadata_prefix='arXiv',
from_date='2015-01-18',
until_date='2015-01-17',
url='http://export.arxiv.org/oai2',
name=None,
setspecs='physics:hep-lat'
)


@responses.activate
Expand Down Expand Up @@ -100,6 +117,28 @@ def test_list_records(app, sample_list_xml, sample_list_xml_cs):
assert len(records) == 190


@responses.activate
def test_list_no_records(app, sample_empty_set):
"""Check harvesting of records from multiple setspecs."""
responses.add(
responses.GET,
re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
body=sample_empty_set,
content_type='text/xml'
)

with app.app_context():
_, records = list_records(
metadata_prefix='arXiv',
from_date='2015-01-17',
until_date='2015-01-17',
url='http://export.arxiv.org/oai2',
name=None,
setspecs='physics:hep-lat'
)
assert not records


@responses.activate
def test_get_from_identifiers(app, sample_record_xml_oai_dc):
"""Test that getting records via identifiers work."""
Expand Down
85 changes: 85 additions & 0 deletions tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

import re

import pytest
import responses

from invenio_oaiharvester.errors import InvenioOAIHarvesterError
from invenio_oaiharvester.signals import oaiharvest_finished
from invenio_oaiharvester.tasks import get_specific_records, \
list_records_from_dates


@responses.activate
def test_get_specific_records(app, sample_record_xml):
"""Test that getting records via identifiers work with prefix."""
def foo(request, records, name):
assert len(records) == 1

responses.add(
responses.GET,
'http://export.arxiv.org/oai2',
body=sample_record_xml,
content_type='text/xml'
)
oaiharvest_finished.connect(foo)
try:
with app.app_context():
get_specific_records(
'oai:arXiv.org:1507.03011',
metadata_prefix="arXiv",
url='http://export.arxiv.org/oai2'
)
# As a list of identifiers
get_specific_records(
['oai:arXiv.org:1507.03011'],
metadata_prefix="arXiv",
url='http://export.arxiv.org/oai2'
)
finally:
oaiharvest_finished.disconnect(foo)


@responses.activate
def test_list_records_from_dates(app, sample_list_xml):
"""Check harvesting of records from multiple setspecs."""
def bar(request, records, name):
assert len(records) == 150

responses.add(
responses.GET,
re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
body=sample_list_xml,
content_type='text/xml'
)
oaiharvest_finished.connect(bar)
try:
with app.app_context():
list_records_from_dates(
metadata_prefix='arXiv',
from_date='2015-01-15',
until_date='2015-01-20',
url='http://export.arxiv.org/oai2',
name=None,
setspecs='physics'
)
finally:
oaiharvest_finished.disconnect(bar)
Loading

0 comments on commit a9bc75d

Please sign in to comment.