From 7dfd08c0866a2e5cf1b765e0ec1546447380fb4e Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Wed, 17 Aug 2016 15:04:01 -0700 Subject: [PATCH] Storing the size and mimetypes from uploads size is stored as bytes there's a bit of redundancy with getting the mimetypes of files, by default, mimetype is determined by the url, fallback is done by using `python-magic` module --- ckan/lib/uploader.py | 24 ++++++++++++++++++++++++ ckan/logic/action/create.py | 12 ++++++++---- ckan/logic/action/update.py | 6 +++++- requirements.in | 1 + 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index d65d15a3888..7465b280f49 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -4,6 +4,7 @@ import cgi import datetime import logging +import magic import ckan.lib.munge as munge import ckan.logic as logic @@ -96,6 +97,8 @@ def __init__(self, object_type, old_filename=None): self.storage_path = None self.filename = None self.filepath = None + self.filesize = 0 # bytes + self.mimetype = None path = get_storage_path() if not path: return @@ -166,10 +169,18 @@ def upload(self, max_size=2): raise logic.ValidationError( {self.file_field: ['File upload too large']} ) + output_file.seek(0, os.SEEK_END) + self.filesize = output_file.tell() output_file.close() os.rename(self.tmp_filepath, self.filepath) self.clear = True + try: + self.mimetype = magic.from_file(filepath, mime=True) + except: + # Not that important if call above fails + self.mimetype = None + if (self.clear and self.old_filename and not self.old_filename.startswith('http')): try: @@ -192,6 +203,8 @@ def __init__(self, resource): if e.errno != 17: raise self.filename = None + self.filesize = 0 # bytes + self.mimetype = None url = resource.get('url') upload_field_storage = resource.pop('upload', None) @@ -254,6 +267,7 @@ def upload(self, id, max_size=10): current_size = current_size + 1 # MB chunks data = self.upload_file.read(2 ** 20) + if not data: break output_file.write(data) @@ -262,8 +276,18 @@ def upload(self, id, max_size=10): raise logic.ValidationError( {'upload': ['File upload too large']} ) + + output_file.seek(0, os.SEEK_END) + self.filesize = output_file.tell() output_file.close() os.rename(tmp_filepath, filepath) + + try: + self.mimetype = magic.from_file(filepath, mime=True) + except: + # Not that important if call above fails + self.mimetype = None + return # The resource form only sets self.clear (via the input clear_upload) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 799f1064430..23ac48a59e9 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -3,6 +3,7 @@ '''API functions for adding data to CKAN.''' import logging +import mimetypes import random import re from socket import error as socket_error @@ -295,6 +296,13 @@ def resource_create(context, data_dict): pkg_dict['resources'] = [] upload = uploader.get_resource_uploader(data_dict) + ## Get out resource_id resource from model as it will not appear in + ## package_show until after commit + upload.upload(context['package'].resources[-1].id, + uploader.get_max_resource_size()) + + data_dict['mimetype'] = mimetypes.guess_type(data_dict['url'])[0] or upload.mimetype + data_dict['size'] = upload.filesize pkg_dict['resources'].append(data_dict) @@ -307,10 +315,6 @@ def resource_create(context, data_dict): errors = e.error_dict['resources'][-1] raise ValidationError(errors) - ## Get out resource_id resource from model as it will not appear in - ## package_show until after commit - upload.upload(context['package'].resources[-1].id, - uploader.get_max_resource_size()) model.repo.commit() ## Run package show again to get out actual last_resource diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index 667021cb14e..c8193740c9f 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -6,6 +6,7 @@ import datetime import time import json +import mimetypes from ckan.common import config import paste.deploy.converters as converters @@ -93,6 +94,10 @@ def resource_update(context, data_dict): plugin.before_update(context, pkg_dict['resources'][n], data_dict) upload = uploader.get_resource_uploader(data_dict) + upload.upload(id, uploader.get_max_resource_size()) + + data_dict['mimetype'] = mimetypes.guess_type(data_dict['url'])[0] or upload.mimetype + data_dict['size'] = upload.filesize pkg_dict['resources'][n] = data_dict @@ -105,7 +110,6 @@ def resource_update(context, data_dict): errors = e.error_dict['resources'][n] raise ValidationError(errors) - upload.upload(id, uploader.get_max_resource_size()) model.repo.commit() resource = _get_action('resource_show')(context, {'id': id}) diff --git a/requirements.in b/requirements.in index fed7a94fda3..c97b96ffaed 100644 --- a/requirements.in +++ b/requirements.in @@ -13,6 +13,7 @@ Pairtree==0.7.1-T passlib==1.6.5 paste==1.7.5.1 psycopg2==2.4.5 +python-magic>=0.4.6 pysolr==3.5.0 Pylons==0.9.7 python-dateutil>=1.5.0,<2.0.0