Skip to content
This repository has been archived by the owner on Jan 12, 2023. It is now read-only.

Prevent phantom metadata from crashing queries #75

Merged
merged 3 commits into from Feb 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion gutenberg/query/api.py
Expand Up @@ -9,6 +9,7 @@
from six import with_metaclass
from rdflib.term import URIRef

from gutenberg._domain_model.exceptions import InvalidEtextIdException
from gutenberg._domain_model.exceptions import UnsupportedFeatureException
from gutenberg._domain_model.types import validate_etextno
from gutenberg._util.abc import abstractclassmethod
Expand Down Expand Up @@ -114,7 +115,10 @@ def _uri_to_etext(cls, uri_ref):
meta-data RDF graph to a human-friendly integer text identifier.

"""
return validate_etextno(int(os.path.basename(uri_ref.toPython())))
try:
return validate_etextno(int(os.path.basename(uri_ref.toPython())))
except InvalidEtextIdException:
return None

@staticmethod
def __find_implementations():
Expand Down
3 changes: 2 additions & 1 deletion gutenberg/query/extractors.py
Expand Up @@ -40,7 +40,8 @@ def get_metadata(cls, etextno):
@classmethod
def get_etexts(cls, requested_value):
query = cls._metadata()[:cls.predicate():cls.contains(requested_value)]
return frozenset(cls._uri_to_etext(result) for result in query)
results = (cls._uri_to_etext(result) for result in query)
return frozenset(result for result in results if result is not None)


class AuthorExtractor(_SimplePredicateRelationshipExtractor):
Expand Down
7 changes: 5 additions & 2 deletions tests/_sample_metadata.py
Expand Up @@ -12,14 +12,17 @@
class SampleMetaData(object):
__uids = {}

def __init__(self, etextno, authors=None, titles=None, formaturi=None, rights=None, subject=None, language=None):
def __init__(self, etextno, authors=None, titles=None, formaturi=None, rights=None, subject=None, language=None, is_phantom=False):
self.author = frozenset(authors or [])
self.title = frozenset(titles or [])
self.formaturi = frozenset(formaturi or [])
self.etextno = etextno or self.__create_uid(self.author | self.title)
self.etextno = (etextno
if etextno is not None
else self.__create_uid(self.author | self.title))
self.rights = frozenset(rights or [])
self.subject = frozenset(subject or [])
self.language = frozenset(language or [])
self.is_phantom = is_phantom

@classmethod
def __create_uid(cls, hashable):
Expand Down
5 changes: 5 additions & 0 deletions tests/data/sample-metadata/0
@@ -0,0 +1,5 @@
{
"is_phantom": true,
"language": ["en"],
"rights": ["Public domain in the USA."]
}
23 changes: 13 additions & 10 deletions tests/test_query.py
Expand Up @@ -58,16 +58,19 @@ def _run_get_etexts_for_feature(self, feature):
for testcase in self.sample_data():
for feature_value in getattr(testcase, feature):
actual = get_etexts(feature, feature_value)
self.assertIn(
testcase.etextno,
actual,
"didn't retrieve {etextno} when querying for books that "
'have {feature}="{feature_value}" (got {actual}).'
.format(
etextno=testcase.etextno,
feature=feature,
feature_value=feature_value,
actual=actual))
if testcase.is_phantom:
self.assertNotIn(testcase.etextno, actual)
else:
self.assertIn(
testcase.etextno,
actual,
"didn't retrieve {etextno} when querying for books "
'that have {feature}="{feature_value}" (got {actual}).'
.format(
etextno=testcase.etextno,
feature=feature,
feature_value=feature_value,
actual=actual))

def test_get_etexts_title(self):
self._run_get_etexts_for_feature('title')
Expand Down