Skip to content

Commit

Permalink
Tighten checks around incoming resource ids
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 24, 2023
1 parent 1e76740 commit d7d1801
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 4 deletions.
29 changes: 29 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -9,6 +9,35 @@ Changelog

.. towncrier release notes start
v.2.9.9 2023-05-24
==================

Bugfixes
--------

- `CVE-2023-32321 <https://github.com/ckan/ckan/security/advisories/GHSA-446m-hmmm-hm8m>`_: fix
potential path traversal, remote code execution, information disclosure and
DOS vulnerabilities via crafted resource ids.

Migration notes
---------------
- The default storage backend for the session data used by the Beaker library
uses the Python ``pickle`` module, which is considered unsafe. While there is
no direct known vulnerability using this vector, a safer alternative is to
store the session data in the `client-side cookie <https://beaker.readthedocs.io/en/latest/sessions.html#cookie-based>`_.
This will probably be the default behaviour in future CKAN versions::

# ckan.ini
beaker.session.type = cookie
beaker.session.data_serializer = json
beaker.session.validate_key = CHANGE_ME

beaker.session.httponly = True
beaker.session.secure = True
beaker.session.samesite = Lax
# or Strict, depending on your setup


v.2.9.8 2023-02-15
==================

Expand Down
10 changes: 8 additions & 2 deletions ckan/lib/uploader.py
Expand Up @@ -291,13 +291,19 @@ def __init__(self, resource):
resource['url_type'] = ''

def get_directory(self, id):
directory = os.path.join(self.storage_path,
id[0:3], id[3:6])
real_storage = os.path.realpath(self.storage_path)
directory = os.path.join(real_storage, id[0:3], id[3:6])
if directory != os.path.realpath(directory):
raise logic.ValidationError({'upload': ['Invalid storage directory']})
return directory

def get_path(self, id):
directory = self.get_directory(id)
filepath = os.path.join(directory, id[6:])

if filepath != os.path.realpath(filepath):
raise logic.ValidationError({'upload': ['Invalid storage path']})

return filepath

def upload(self, id, max_size=10):
Expand Down
6 changes: 4 additions & 2 deletions ckan/logic/schema.py
Expand Up @@ -29,9 +29,11 @@ def wrapper():
def default_resource_schema(
ignore_empty, unicode_safe, ignore, ignore_missing,
remove_whitespace, if_empty_guess_format, clean_format, isodate,
int_validator, extras_valid_json, keep_extras):
int_validator, extras_valid_json, keep_extras,
resource_id_validator, resource_id_does_not_exist):
return {
'id': [ignore_empty, unicode_safe],
'id': [ignore_empty, resource_id_validator,
resource_id_does_not_exist, unicode_safe],
'package_id': [ignore],
'url': [ignore_missing, unicode_safe, remove_whitespace],
'description': [ignore_missing, unicode_safe],
Expand Down
34 changes: 34 additions & 0 deletions ckan/logic/validators.py
Expand Up @@ -12,6 +12,8 @@
from six import string_types, iteritems
from six.moves.urllib.parse import urlparse

from sqlalchemy.orm.exc import NoResultFound

import ckan.lib.navl.dictization_functions as df
import ckan.logic as logic
import ckan.lib.helpers as h
Expand Down Expand Up @@ -188,6 +190,29 @@ def package_id_does_not_exist(value, context):
raise Invalid(_('Dataset id already exists'))
return value


def resource_id_does_not_exist(key, data, errors, context):
session = context['session']
model = context['model']

if data[key] is missing:
return
resource_id = data[key]
assert key[0] == 'resources', ('validator depends on resource schema '
'validating as part of package schema')
package_id = data.get(('id',))
query = session.query(model.Resource.package_id).filter(
model.Resource.id == resource_id,
model.Resource.state != State.DELETED,
)
try:
[parent_id] = query.one()
except NoResultFound:
return
if parent_id != package_id:
errors[key].append(_('Resource id already exists.'))


def package_name_exists(value, context):

model = context['model']
Expand Down Expand Up @@ -230,6 +255,15 @@ def resource_id_exists(value, context):
return value


def resource_id_validator(value):
pattern = re.compile("[^0-9a-zA-Z _-]")
if pattern.search(value):
raise Invalid(_('Invalid characters in resource id'))
if len(value) < 7 or len(value) > 100:
raise Invalid(_('Invalid length for resource id'))
return value


def user_id_exists(user_id, context):
'''Raises Invalid if the given user_id does not exist in the model given
in the context, otherwise returns the given user_id.
Expand Down
21 changes: 21 additions & 0 deletions ckan/tests/lib/test_uploader.py
@@ -1,8 +1,10 @@
# encoding: utf-8
import pytest
import six

from werkzeug.datastructures import FileStorage

from ckan.logic import ValidationError
import ckan.lib.uploader
from ckan.lib.uploader import ResourceUpload, Upload

Expand Down Expand Up @@ -62,6 +64,25 @@ def test_resource_with_upload(
assert res_upload.filesize == 0
assert res_upload.filename == u'data.csv'

def test_resource_with_dodgy_id(
self, ckan_config, monkeypatch, tmpdir):
monkeypatch.setitem(ckan_config, u'ckan.storage_path', str(tmpdir))
monkeypatch.setattr(ckan.lib.uploader, u'_storage_path', str(tmpdir))

resource_id = u'aaabbb/../../../../nope.txt'
res = {u'clear_upload': u'',
u'format': u'PNG',
u'url': u'https://example.com/data.csv',
u'description': u'',
u'upload': FileStorage(filename=u'data.csv', content_type=u'CSV'),
u'package_id': u'dataset1',
u'id': resource_id,
u'name': u'data.csv'}
res_upload = ResourceUpload(res)

with pytest.raises(ValidationError):
res_upload.upload(resource_id)


class TestUpload(object):
def test_group_upload(self, monkeypatch, tmpdir, make_app, ckan_config):
Expand Down
36 changes: 36 additions & 0 deletions ckan/tests/logic/action/test_create.py
Expand Up @@ -395,6 +395,42 @@ def test_it_requires_package_id(self):
with pytest.raises(logic.ValidationError):
helpers.call_action("resource_create", **data_dict)

def test_invalid_characters_in_id(self):

data_dict = {
"id": "../../nope.txt",
"package_id": factories.Dataset()["id"],
"url": "http://data",
"name": "A nice resource",
}

with pytest.raises(logic.ValidationError):
helpers.call_action("resource_create", **data_dict)

def test_id_too_long(self):

data_dict = {
"id": "x" * 111,
"package_id": factories.Dataset()["id"],
"url": "http://data",
"name": "A nice resource",
}

with pytest.raises(logic.ValidationError):
helpers.call_action("resource_create", **data_dict)

def test_id_already_exists(self):
data_dict = {
'id': 'wont-be-fooled-again',
'package_id': factories.Dataset()['id'],
}
helpers.call_action('resource_create', **data_dict)

data_dict['package_id'] = factories.Dataset()['id']

with pytest.raises(logic.ValidationError):
helpers.call_action('resource_create', **data_dict)

def test_doesnt_require_url(self):
dataset = factories.Dataset()
data_dict = {"package_id": dataset["id"]}
Expand Down
18 changes: 18 additions & 0 deletions ckan/tests/logic/action/test_patch.py
Expand Up @@ -5,6 +5,7 @@

from ckan.tests import helpers, factories
from ckan.logic.action.get import package_show as core_package_show
from ckan.logic import ValidationError


@pytest.mark.usefixtures("clean_db", "with_request_context")
Expand All @@ -27,6 +28,23 @@ def test_package_patch_updating_single_field(self):
assert dataset2["name"] == "somethingnew"
assert dataset2["notes"] == "some test now"

def test_package_patch_invalid_characters_in_resource_id(self):
user = factories.User()
dataset = factories.Dataset(user=user)

with pytest.raises(ValidationError):
helpers.call_action(
"package_patch",
id=dataset["id"],
resources=[
{
"id": "../../nope.txt",
"url": "http://data",
"name": "A nice resource",
},
],
)

def test_resource_patch_updating_single_field(self):
user = factories.User()
dataset = factories.Dataset(
Expand Down
32 changes: 32 additions & 0 deletions ckan/tests/logic/action/test_update.py
Expand Up @@ -660,6 +660,23 @@ def test_resources(self):
assert resources_[1]["url"] == "http://datahub.io/index.json"
assert resources_[1]["position"] == 1

def test_invalid_characters_in_resource_id(self):
user = factories.User()
dataset = factories.Dataset(user=user)

with pytest.raises(logic.ValidationError):
helpers.call_action(
"package_update",
id=dataset["id"],
resources=[
{
"id": "../../nope.txt",
"url": "http://data",
"name": "A nice resource",
},
],
)

def test_tags(self):
user = factories.User()
dataset = factories.Dataset(user=user)
Expand Down Expand Up @@ -1925,6 +1942,21 @@ def test_revise_add_resource(self):
)
assert response['package']['resources'][0]['name'] == 'new resource'

def test_revise_invalid_resource_id(self):
dataset = factories.Dataset()
with pytest.raises(logic.ValidationError):
helpers.call_action(
'package_revise',
match={'id': dataset['id']},
update__resources__extend=[
{
'id': '../../nope.txt',
'name': 'new resource',
'url': 'http://example.com'
}
],
)

def test_revise_resource_by_index(self):
dataset = factories.Dataset(resources=[{'url': 'http://example.com'}])
response = helpers.call_action(
Expand Down

0 comments on commit d7d1801

Please sign in to comment.