Skip to content

Commit

Permalink
Storing the size and mimetypes from uploads
Browse files Browse the repository at this point in the history
size is stored as bytes

there's a bit of redundancy with getting the mimetypes of files, by
default, mimetype is determined by the url, fallback is done by using
`python-magic` module
  • Loading branch information
jrods committed Oct 25, 2016
1 parent c0d88b5 commit 7dfd08c
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 5 deletions.
24 changes: 24 additions & 0 deletions ckan/lib/uploader.py
Expand Up @@ -4,6 +4,7 @@
import cgi
import datetime
import logging
import magic

import ckan.lib.munge as munge
import ckan.logic as logic
Expand Down Expand Up @@ -96,6 +97,8 @@ def __init__(self, object_type, old_filename=None):
self.storage_path = None
self.filename = None
self.filepath = None
self.filesize = 0 # bytes
self.mimetype = None
path = get_storage_path()
if not path:
return
Expand Down Expand Up @@ -166,10 +169,18 @@ def upload(self, max_size=2):
raise logic.ValidationError(
{self.file_field: ['File upload too large']}
)
output_file.seek(0, os.SEEK_END)
self.filesize = output_file.tell()
output_file.close()
os.rename(self.tmp_filepath, self.filepath)
self.clear = True

try:
self.mimetype = magic.from_file(filepath, mime=True)
except:
# Not that important if call above fails
self.mimetype = None

if (self.clear and self.old_filename
and not self.old_filename.startswith('http')):
try:
Expand All @@ -192,6 +203,8 @@ def __init__(self, resource):
if e.errno != 17:
raise
self.filename = None
self.filesize = 0 # bytes
self.mimetype = None

url = resource.get('url')
upload_field_storage = resource.pop('upload', None)
Expand Down Expand Up @@ -254,6 +267,7 @@ def upload(self, id, max_size=10):
current_size = current_size + 1
# MB chunks
data = self.upload_file.read(2 ** 20)

if not data:
break
output_file.write(data)
Expand All @@ -262,8 +276,18 @@ def upload(self, id, max_size=10):
raise logic.ValidationError(
{'upload': ['File upload too large']}
)

output_file.seek(0, os.SEEK_END)
self.filesize = output_file.tell()
output_file.close()
os.rename(tmp_filepath, filepath)

try:
self.mimetype = magic.from_file(filepath, mime=True)
except:
# Not that important if call above fails
self.mimetype = None

return

# The resource form only sets self.clear (via the input clear_upload)
Expand Down
12 changes: 8 additions & 4 deletions ckan/logic/action/create.py
Expand Up @@ -3,6 +3,7 @@
'''API functions for adding data to CKAN.'''

import logging
import mimetypes
import random
import re
from socket import error as socket_error
Expand Down Expand Up @@ -295,6 +296,13 @@ def resource_create(context, data_dict):
pkg_dict['resources'] = []

upload = uploader.get_resource_uploader(data_dict)
## Get out resource_id resource from model as it will not appear in
## package_show until after commit
upload.upload(context['package'].resources[-1].id,
uploader.get_max_resource_size())

data_dict['mimetype'] = mimetypes.guess_type(data_dict['url'])[0] or upload.mimetype
data_dict['size'] = upload.filesize

pkg_dict['resources'].append(data_dict)

Expand All @@ -307,10 +315,6 @@ def resource_create(context, data_dict):
errors = e.error_dict['resources'][-1]
raise ValidationError(errors)

## Get out resource_id resource from model as it will not appear in
## package_show until after commit
upload.upload(context['package'].resources[-1].id,
uploader.get_max_resource_size())
model.repo.commit()

## Run package show again to get out actual last_resource
Expand Down
6 changes: 5 additions & 1 deletion ckan/logic/action/update.py
Expand Up @@ -6,6 +6,7 @@
import datetime
import time
import json
import mimetypes

from ckan.common import config
import paste.deploy.converters as converters
Expand Down Expand Up @@ -93,6 +94,10 @@ def resource_update(context, data_dict):
plugin.before_update(context, pkg_dict['resources'][n], data_dict)

upload = uploader.get_resource_uploader(data_dict)
upload.upload(id, uploader.get_max_resource_size())

data_dict['mimetype'] = mimetypes.guess_type(data_dict['url'])[0] or upload.mimetype
data_dict['size'] = upload.filesize

pkg_dict['resources'][n] = data_dict

Expand All @@ -105,7 +110,6 @@ def resource_update(context, data_dict):
errors = e.error_dict['resources'][n]
raise ValidationError(errors)

upload.upload(id, uploader.get_max_resource_size())
model.repo.commit()

resource = _get_action('resource_show')(context, {'id': id})
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Expand Up @@ -13,6 +13,7 @@ Pairtree==0.7.1-T
passlib==1.6.5
paste==1.7.5.1
psycopg2==2.4.5
python-magic>=0.4.6
pysolr==3.5.0
Pylons==0.9.7
python-dateutil>=1.5.0,<2.0.0
Expand Down

0 comments on commit 7dfd08c

Please sign in to comment.