Skip to content

Commit

Permalink
Merge branch 'tobes-2057-fix-filename-munging'
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Mar 5, 2015
2 parents 3edd8dd + 715fd88 commit 8e15127
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 9 deletions.
5 changes: 2 additions & 3 deletions ckan/lib/dictization/model_dictize.py
Expand Up @@ -116,8 +116,7 @@ def resource_dictize(res, context):
## for_edit is only called at the times when the dataset is to be edited
## in the frontend. Without for_edit the whole qualified url is returned.
if resource.get('url_type') == 'upload' and not context.get('for_edit'):
last_part = url.split('/')[-1]
cleaned_name = munge.munge_filename(last_part)
cleaned_name = munge.munge_filename(url)
resource['url'] = h.url_for(controller='package',
action='resource_download',
id=resource['package_id'],
Expand Down Expand Up @@ -472,7 +471,7 @@ def get_packages_for_this_group(group_, just_the_count=False):
if image_url and not image_url.startswith('http'):
#munge here should not have an effect only doing it incase
#of potential vulnerability of dodgy api input
image_url = munge.munge_filename(image_url)
image_url = munge.munge_filename_legacy(image_url)
result_dict['image_display_url'] = h.url_for_static(
'uploads/group/%s' % result_dict.get('image_url'),
qualified=True
Expand Down
36 changes: 35 additions & 1 deletion ckan/lib/munge.py
Expand Up @@ -4,6 +4,7 @@
# improved.

import re
import os.path

from ckan import model

Expand Down Expand Up @@ -111,14 +112,47 @@ def munge_tag(tag):
return tag


def munge_filename(filename):
def munge_filename_legacy(filename):
''' Tidies a filename. NB: deprecated
Unfortunately it mangles any path or filename extension, so is deprecated.
It needs to remain unchanged for use by group_dictize() and
Upload.update_data_dict() because if this routine changes then group images
uploaded previous to the change may not be viewable.
'''
filename = substitute_ascii_equivalents(filename)
filename = filename.strip()
filename = re.sub(r'[^a-zA-Z0-9.\- ]', '', filename).replace(' ', '-')
filename = _munge_to_length(filename, 3, 100)
return filename


def munge_filename(filename):
''' Tidies a filename
Keeps the filename extension (e.g. .csv).
Strips off any path on the front.
'''

# just get the filename ignore the path
path, filename = os.path.split(filename)
# clean up
filename = substitute_ascii_equivalents(filename)
filename = filename.lower().strip()
filename = re.sub(r'[^a-zA-Z0-9. -]', '', filename).replace(' ', '-')
# resize if needed but keep extension
name, ext = os.path.splitext(filename)
# limit overly long extensions
if len(ext) > 21:
ext = ext[:21]
# max/min size
ext_length = len(ext)
name = _munge_to_length(name, max(3 - ext_length, 1), 100 - ext_length)
filename = name + ext

return filename


def _munge_to_length(string, min_length, max_length):
'''Pad/truncates a string'''
if len(string) < min_length:
Expand Down
6 changes: 3 additions & 3 deletions ckan/lib/uploader.py
Expand Up @@ -60,7 +60,7 @@ def get_max_resource_size():

class Upload(object):
def __init__(self, object_type, old_filename=None):
''' Setup upload by creating a subdirectory of the storage directory
''' Setup upload by creating a subdirectory of the storage directory
of name object_type. old_filename is the name of the file in the url
field last time'''

Expand Down Expand Up @@ -102,7 +102,7 @@ def update_data_dict(self, data_dict, url_field, file_field, clear_field):
if isinstance(self.upload_field_storage, cgi.FieldStorage):
self.filename = self.upload_field_storage.filename
self.filename = str(datetime.datetime.utcnow()) + self.filename
self.filename = munge.munge_filename(self.filename)
self.filename = munge.munge_filename_legacy(self.filename)
self.filepath = os.path.join(self.storage_path, self.filename)
data_dict[url_field] = self.filename
self.upload_file = self.upload_field_storage.file
Expand All @@ -127,7 +127,7 @@ def upload(self, max_size=2):
current_size = 0
while True:
current_size = current_size + 1
# MB chuncks
# MB chunks
data = self.upload_file.read(2 ** 20)
if not data:
break
Expand Down
44 changes: 42 additions & 2 deletions ckan/new_tests/lib/test_munge.py
@@ -1,9 +1,43 @@
from nose import tools as nose_tools

from ckan.lib.munge import (munge_filename, munge_name,
from ckan.lib.munge import (munge_filename_legacy, munge_filename, munge_name,
munge_title_to_name, munge_tag)


class TestMungeFilenameLegacy(object):

# (original, expected)
munge_list = [
('unchanged', 'unchanged'),
('bad spaces', 'bad-spaces'),
('s', 's__'), # too short
('random:other%character&', 'randomothercharacter'),
(u'u with umlaut \xfc', 'u-with-umlaut-u'),
('2014-11-10 12:24:05.340603my_image.jpeg',
'2014-11-10-122405.340603myimage.jpeg'),
('file.csv', 'file.csv'),
('f' * 100 + '.csv', 'f' * 100),
('path/to/file.csv', 'pathtofile.csv'),
('.longextension', '.longextension'),
('a.longextension', 'a.longextension'),
('.1', '.1_'),
]

def test_munge_filename(self):
'''Munge a list of filenames gives expected results.'''
for org, exp in self.munge_list:
munge = munge_filename_legacy(org)
nose_tools.assert_equal(munge, exp)

def test_munge_filename_multiple_pass(self):
'''Munging filename multiple times produces same result.'''
for org, exp in self.munge_list:
first_munge = munge_filename_legacy(org)
nose_tools.assert_equal(first_munge, exp)
second_munge = munge_filename_legacy(first_munge)
nose_tools.assert_equal(second_munge, exp)


class TestMungeFilename(object):

# (original, expected)
Expand All @@ -14,7 +48,13 @@ class TestMungeFilename(object):
('random:other%character&', 'randomothercharacter'),
(u'u with umlaut \xfc', 'u-with-umlaut-u'),
('2014-11-10 12:24:05.340603my_image.jpeg',
'2014-11-10-122405.340603myimage.jpeg')
'2014-11-10-122405.340603myimage.jpeg'),
('file.csv', 'file.csv'),
('f' * 100 + '.csv', 'f' * 96 + '.csv'),
('path/to/file.csv', 'file.csv'),
('.longextension', '.longextension'),
('a.longextension', 'a.longextension'),
('.1', '.1_'),
]

def test_munge_filename(self):
Expand Down

0 comments on commit 8e15127

Please sign in to comment.