Skip to content

Commit

Permalink
Merge a255788 into 69ec1a9
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Oct 6, 2016
2 parents 69ec1a9 + a255788 commit e8ad12b
Show file tree
Hide file tree
Showing 11 changed files with 133 additions and 36 deletions.
3 changes: 3 additions & 0 deletions invenio_previewer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
PREVIEWER_CSV_VALIDATION_BYTES = 1024
"""Number of bytes read by CSV previewer to validate the file."""

PREVIEWER_CHARDET_BYTES = 1024
"""Number of bytes to read for character encoding detection by `cchardet`."""

PREVIEWER_MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024
"""Maximum file size in bytes for JSON/XML files."""

Expand Down
16 changes: 7 additions & 9 deletions invenio_previewer/extensions/csv_dthreejs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,24 @@

import csv

import chardet
from flask import current_app, render_template

from ..proxies import current_previewer
from ..utils import detect_encoding

previewable_extensions = ['csv', 'dsv']


def validate_csv(file):
"""Return dialect information about given csv file."""
# Read first X bytes from file.
with file.open() as fp:
sample = fp.read(
current_app.config.get('PREVIEWER_CSV_VALIDATION_BYTES', 1024)
)
try:
# Detect encoding and dialect
encoding = chardet.detect(sample).get('encoding')
delimiter = csv.Sniffer().sniff(sample.decode(encoding)).delimiter
is_valid = True
with file.open() as fp:
encoding = detect_encoding(fp, default='utf-8')
sample = fp.read(
current_app.config.get('PREVIEWER_CSV_VALIDATION_BYTES', 1024))
delimiter = csv.Sniffer().sniff(sample.decode(encoding)).delimiter
is_valid = True
except Exception as e:
current_app.logger.debug(
'File {0} is not valid CSV: {1}'.format(file.uri, e))
Expand Down
9 changes: 6 additions & 3 deletions invenio_previewer/extensions/json_prismjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@

from flask import current_app, render_template

from ..utils import detect_encoding

previewable_extensions = ['json']


def render(file):
"""Pretty print the JSON file for rendering."""
with file.open() as fp:
file_content = fp.read().decode('utf-8')
parsed_json = json.loads(file_content, object_pairs_hook=OrderedDict)
return json.dumps(parsed_json, indent=4, separators=(',', ': '))
encoding = detect_encoding(fp, default='utf-8')
file_content = fp.read().decode(encoding)
json_data = json.loads(file_content, object_pairs_hook=OrderedDict)
return json.dumps(json_data, indent=4, separators=(',', ': '))


def validate_json(file):
Expand Down
10 changes: 5 additions & 5 deletions invenio_previewer/extensions/mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@

import mistune
from flask import render_template
from ..utils import detect_encoding

previewable_extensions = ['md']


def render(file):
"""Render HTML from Markdown file content."""
fp = file.open()
content = fp.read()
result = mistune.markdown(content.decode('utf-8'))
fp.close()
return result
with file.open() as fp:
encoding = detect_encoding(fp, default='utf-8')
result = mistune.markdown(fp.read().decode(encoding))
return result


def can_preview(file):
Expand Down
5 changes: 4 additions & 1 deletion invenio_previewer/extensions/xml_prismjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,16 @@

from flask import current_app, render_template

from ..utils import detect_encoding

previewable_extensions = ['xml']


def render(file):
"""Pretty print the XML file for rendering."""
with file.open() as fp:
file_content = fp.read().decode('utf-8')
encoding = detect_encoding(fp, default='utf-8')
file_content = fp.read().decode(encoding)
parsed_xml = xml.dom.minidom.parseString(file_content)
return parsed_xml.toprettyxml(indent=' ', newl='')

Expand Down
12 changes: 6 additions & 6 deletions invenio_previewer/extensions/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@

import os
import zipfile
import chardet
from six import binary_type

import cchardet as chardet
from flask import current_app, render_template
from six import binary_type

from .._compat import text_type
from ..proxies import current_previewer
Expand All @@ -51,7 +51,7 @@ def make_tree(file):
sample = ' '.join(zf.namelist()[:max_files_count])
if not isinstance(sample, binary_type):
sample = sample.encode('utf-16be')
encoding = chardet.detect(sample).get('encoding')
encoding = chardet.detect(sample).get('encoding', 'utf-8')
for i, info in enumerate(zf.infolist()):
if i > max_files_count:
raise BufferError('Too many files inside the ZIP file.')
Expand All @@ -74,11 +74,11 @@ def make_tree(file):
node['size'] = info.file_size
except BufferError:
return tree, True, None
except (zipfile.error, zipfile.LargeZipFile) as e:
except (zipfile.LargeZipFile):
return tree, False, 'Zipfile is too large to be previewed.'
except Exception as e:
current_app.logger.warning(str(e), exc_info=True)
return tree, False, 'Zipfile is not previewable.'
finally:
fp.close()

return tree, False, None

Expand Down
49 changes: 49 additions & 0 deletions invenio_previewer/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""Invenio Previewer Utilities."""

import cchardet
from flask import current_app


def detect_encoding(fp, default=None):
"""Detect the cahracter encoding of a file.
:param fp: Open Python file pointer.
:param default: Fallback encoding to use.
:returns: The detected encoding.
.. note:: The file pointer is returned at its original read position.
"""
init_pos = fp.tell()
try:
sample = fp.read(
current_app.config.get('PREVIEWER_CHARDET_BYTES', 1024))
return cchardet.detect(sample).get('encoding', default)
except Exception:
current_app.logger.warning('Encoding detection failed.', exc_info=True)
return default
finally:
fp.seek(init_pos)
13 changes: 10 additions & 3 deletions invenio_previewer/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from __future__ import absolute_import, print_function

from flask import Blueprint, abort, request
from flask import Blueprint, abort, current_app, request

from .api import PreviewFile
from .extensions import default
Expand Down Expand Up @@ -76,8 +76,15 @@ def preview(pid, record, template=None):
for plugin in current_previewer.iter_previewers(
previewers=[file_previewer] if file_previewer else None):
if plugin.can_preview(fileobj):
return plugin.preview(fileobj)

try:
return plugin.preview(fileobj)
except Exception:
current_app.logger.warning(
('Preview failed for {key}, in {pid_type}:{pid_value}'
.format(key=fileobj.file.key,
pid_type=fileobj.pid.pid_type,
pid_value=fileobj.pid.pid_value)),
exc_info=True)
return default.preview(fileobj)


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
'Flask>=0.11.1',
'Flask-BabelEx>=0.9.3',
'mistune>=0.7.2',
'chardet>=2.3.0',
'cchardet>=1.0.0',
'invenio-assets>=1.0.0a3',
'invenio-pidstore>=1.0.0a6',
'invenio-records-ui>=1.0.0a7',
Expand Down
35 changes: 27 additions & 8 deletions tests/test_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@

from __future__ import absolute_import, print_function

import zipfile

from flask import render_template_string, url_for
from invenio_db import db
from invenio_files_rest.models import ObjectVersion
from invenio_records_files.api import RecordsBuckets
from mock import patch
from six import BytesIO, b


Expand Down Expand Up @@ -75,6 +78,11 @@ def test_markdown_extension(app, webassets, bucket, record):
res = client.get(preview_url(record['control_number'], 'markdown.md'))
assert '<h3>Testing markdown' in res.get_data(as_text=True)

with patch('mistune.markdown', side_effect=Exception):
res = client.get(preview_url(record['control_number'],
'markdown.md'))
assert 'we are unfortunately not' in res.get_data(as_text=True)


def test_pdf_extension(app, webassets, bucket, record):
"""Test view with pdf files."""
Expand All @@ -94,6 +102,10 @@ def test_csv_dthreejs_extension(app, webassets, bucket, record):
assert 'data-csv-source="' in res.get_data(as_text=True)
assert 'data-csv-delimiter=","' in res.get_data(as_text=True)

with patch('csv.Sniffer', side_effect=Exception):
res = client.get(preview_url(record['control_number'], 'test.csv'))
assert 'we are unfortunately not' in res.get_data(as_text=True)


def test_csv_dthreejs_delimiter(app, webassets, bucket, record):
"""Test view with csv files."""
Expand All @@ -114,15 +126,13 @@ def test_zip_extension(app, webassets, bucket, record, zip_fp):
assert 'Example.txt' in res.get_data(as_text=True)
assert u'Lé UTF8 test.txt' in res.get_data(as_text=True)

with patch('zipfile.ZipFile', side_effect=zipfile.LargeZipFile):
res = client.get(preview_url(record['control_number'], 'test.zip'))
assert 'Zipfile is too large' in res.get_data(as_text=True)

def test_invalid_zip(app, webassets, bucket, record):
"""Test view with an invalid zipfile."""
create_file(
record, bucket, 'test.zip', BytesIO(b'not a zipfile'))

with app.test_client() as client:
res = client.get(preview_url(record['control_number'], 'test.zip'))
assert 'Zipfile is not previewable' in res.get_data(as_text=True)
with patch('zipfile.ZipFile', side_effect=Exception):
res = client.get(preview_url(record['control_number'], 'test.zip'))
assert 'Zipfile is not previewable' in res.get_data(as_text=True)


def test_json_extension(app, webassets, bucket, record):
Expand Down Expand Up @@ -152,6 +162,11 @@ def test_json_extension(app, webassets, bucket, record):
'}'
assert rendered_json in res.get_data(as_text=True)

with patch('json.dumps', side_effect=Exception):
res = client.get(preview_url(record['control_number'],
'test.json'))
assert 'we are unfortunately not' in res.get_data(as_text=True)


def test_max_file_size(app, webassets, bucket, record):
"""Test file size limitation."""
Expand Down Expand Up @@ -179,6 +194,10 @@ def test_xml_extension(app, webassets, bucket, record):
assert '&lt;c&gt;2&lt;/c&gt;' in res.get_data(as_text=True)
assert '&lt;/el&gt;' in res.get_data(as_text=True)

with patch('xml.dom.minidom.Node.toprettyxml', side_effect=Exception):
res = client.get(preview_url(record['control_number'], 'test.xml'))
assert 'we are unfortunately not' in res.get_data(as_text=True)


def test_ipynb_extension(app, webassets, bucket, record):
"""Test view with IPython notebooks files."""
Expand Down
15 changes: 15 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,26 @@

from __future__ import absolute_import, print_function

from mock import patch
from six import BytesIO

from invenio_previewer import current_previewer
from invenio_previewer.utils import detect_encoding


def test_default_file_reader(app, record_with_file, testfile):
"""Test view by default."""
file_ = current_previewer.record_file_factory(
None, record_with_file, testfile.key)
assert file_.version_id == testfile.version_id


def test_detect_encoding(app):
"""Test encoding detection."""
f = BytesIO(u'Γκρήκ Στρίνγκ'.encode('utf-8'))
initial_position = f.tell()
assert detect_encoding(f).lower() == 'utf-8'
assert f.tell() == initial_position

with patch('cchardet.detect', Exception):
assert detect_encoding(f) is None

0 comments on commit e8ad12b

Please sign in to comment.