diff --git a/pybossa/api/__init__.py b/pybossa/api/__init__.py index 39f5abd6aa..b1a9b7dc63 100644 --- a/pybossa/api/__init__.py +++ b/pybossa/api/__init__.py @@ -46,6 +46,7 @@ from pybossa.ratelimit import ratelimit from pybossa.cache.projects import n_tasks import pybossa.sched as sched +from pybossa.util import sign_task from pybossa.error import ErrorStatus from global_stats import GlobalStatsAPI from task import TaskAPI @@ -133,6 +134,12 @@ def register_api(view, endpoint, url, pk='id', pk_type='int'): register_api(ProjectByNameAPI, 'api_projectbyname', '/projectbyname', pk='key', pk_type='string') +def add_task_signature(tasks): + if current_app.config.get('ENABLE_ENCRYPTION'): + for task in tasks: + sign_task(task) + + @jsonpify @blueprint.route('/project//newtask') @ratelimit(limit=ratelimits.get('LIMIT'), per=ratelimits.get('PER')) @@ -160,6 +167,7 @@ def new_task(project_id): guard.extend_task_presented_timestamp_expiry(task, user_id_or_ip) data = [task.dictize() for task in tasks] + add_task_signature(data) if len(data) == 0: response = make_response(json.dumps({})) elif len(data) == 1: diff --git a/pybossa/api/api_base.py b/pybossa/api/api_base.py index 923d37d1e2..1b2d38332e 100644 --- a/pybossa/api/api_base.py +++ b/pybossa/api/api_base.py @@ -166,6 +166,7 @@ def _create_json_response(self, query_result, oid): if not items: raise Forbidden('Forbidden') ensure_authorized_to('read', query_result[0]) + self._sign_item(items[0]) items = items[0] return json.dumps(items) @@ -555,3 +556,7 @@ def _verify_auth(self, item): on the items to return """ return True + + def _sign_item(self, item): + """Apply custom signature""" + pass diff --git a/pybossa/api/task.py b/pybossa/api/task.py index f347a7d6fe..b626358c3f 100644 --- a/pybossa/api/task.py +++ b/pybossa/api/task.py @@ -28,6 +28,7 @@ from pybossa.model.task import Task from pybossa.model.project import Project from pybossa.core import result_repo +from pybossa.util import sign_task from api_base import APIBase from pybossa.api.pwd_manager import get_pwd_manager from pybossa.util import get_user_id_or_ip, validate_required_fields @@ -78,3 +79,9 @@ def _verify_auth(self, item): project = Project(**get_project_data(item.project_id)) pwd_manager = get_pwd_manager(project) return not pwd_manager.password_needed(project, get_user_id_or_ip()) + + def _sign_item(self, item): + project_id = item['project_id'] + if current_user.admin or \ + current_user.id in get_project_data(project_id)['owners_ids']: + sign_task(item) diff --git a/pybossa/api/task_run.py b/pybossa/api/task_run.py index d4e4c61665..a9c04e87bf 100644 --- a/pybossa/api/task_run.py +++ b/pybossa/api/task_run.py @@ -60,12 +60,13 @@ def _preprocess_post_data(self, data): user_id = current_user.id self.check_can_post(project_id, task_id, user_id) info = data.get('info') + with_encryption = app.config.get('ENABLE_ENCRYPTION') if info is None: return path = "{0}/{1}/{2}".format(project_id, task_id, user_id) - _upload_files_from_json(info, path) - _upload_files_from_request(info, request.files, path) - if app.config.get('PRIVATE_INSTANCE'): + _upload_files_from_json(info, path, with_encryption) + _upload_files_from_request(info, request.files, path, with_encryption) + if with_encryption: data['info'] = { 'pyb_answer_url': _upload_task_run(info, path) } @@ -153,7 +154,7 @@ def _validate_datetime(self, timestamp): return timestamp.isoformat() -def _upload_files_from_json(task_run_info, upload_path): +def _upload_files_from_json(task_run_info, upload_path, with_encryption): if not isinstance(task_run_info, dict): return for key, value in task_run_info.iteritems(): @@ -165,18 +166,20 @@ def _upload_files_from_json(task_run_info, upload_path): out_url = s3_upload_from_string(app.config.get("S3_BUCKET"), content, filename, - directory=upload_path, conn_name='S3_TASKRUN') + directory=upload_path, conn_name='S3_TASKRUN', + with_encryption = with_encryption) task_run_info[key] = out_url -def _upload_files_from_request(task_run_info, files, upload_path): +def _upload_files_from_request(task_run_info, files, upload_path, with_encryption): for key in files: if not key.endswith('__upload_url'): raise BadRequest("File upload field should end in __upload_url") file_obj = request.files[key] s3_url = s3_upload_file_storage(app.config.get("S3_BUCKET"), file_obj, - directory=upload_path, conn_name='S3_TASKRUN') + directory=upload_path, conn_name='S3_TASKRUN', + with_encryption = with_encryption) task_run_info[key] = s3_url @@ -184,4 +187,5 @@ def _upload_task_run(task_run, upload_path): content = json.dumps(task_run, ensure_ascii=False) return s3_upload_from_string(app.config.get("S3_BUCKET"), content, 'pyb_answer.json', - directory=upload_path, conn_name='S3_TASKRUN') + directory=upload_path, conn_name='S3_TASKRUN', + with_encryption = True) diff --git a/pybossa/cloud_store_api/s3.py b/pybossa/cloud_store_api/s3.py index c0112a3be6..ef02d2cd2f 100644 --- a/pybossa/cloud_store_api/s3.py +++ b/pybossa/cloud_store_api/s3.py @@ -5,11 +5,13 @@ from urlparse import urlparse import boto from boto.s3.key import Key +from six import BytesIO from flask import current_app as app from werkzeug.utils import secure_filename import magic from werkzeug.exceptions import BadRequest, InternalServerError from pybossa.cloud_store_api.connection import create_connection +from pybossa.encryption import AESWithGCM allowed_mime_types = ['application/pdf', 'text/csv', @@ -58,7 +60,7 @@ def tmp_file_from_string(string): def s3_upload_from_string(s3_bucket, string, filename, headers=None, directory='', file_type_check=True, - return_key_only=False, conn_name=DEFAULT_CONN): + return_key_only=False, conn_name=DEFAULT_CONN, with_encryption=False): """ Upload a string to s3 """ @@ -66,12 +68,12 @@ def s3_upload_from_string(s3_bucket, string, filename, headers=None, headers = headers or {} return s3_upload_tmp_file( s3_bucket, tmp_file, filename, headers, directory, file_type_check, - return_key_only, conn_name) + return_key_only, conn_name, with_encryption) def s3_upload_file_storage(s3_bucket, source_file, headers=None, directory='', file_type_check=True, return_key_only=False, - conn_name=DEFAULT_CONN): + conn_name=DEFAULT_CONN, with_encryption=False): """ Upload a werzkeug FileStorage content to s3 """ @@ -82,19 +84,26 @@ def s3_upload_file_storage(s3_bucket, source_file, headers=None, directory='', source_file.save(tmp_file.name) return s3_upload_tmp_file( s3_bucket, tmp_file, filename, headers, directory, file_type_check, - return_key_only, conn_name) + return_key_only, conn_name, with_encryption) def s3_upload_tmp_file(s3_bucket, tmp_file, filename, headers, directory='', file_type_check=True, - return_key_only=False, conn_name=DEFAULT_CONN): + return_key_only=False, conn_name=DEFAULT_CONN, + with_encryption=False): """ Upload the content of a temporary file to s3 and delete the file """ try: if file_type_check: check_type(tmp_file.name) - url = s3_upload_file(s3_bucket, tmp_file.name, filename, headers, + content = tmp_file.read() + if with_encryption: + secret = app.config.get('FILE_ENCRYPTION_KEY') + cipher = AESWithGCM(secret) + content = cipher.encrypt(content) + fp = BytesIO(content) + url = s3_upload_file(s3_bucket, fp, filename, headers, directory, return_key_only, conn_name) finally: os.unlink(tmp_file.name) @@ -108,7 +117,7 @@ def form_upload_directory(directory, filename): return "/".join(part for part in parts if part) -def s3_upload_file(s3_bucket, source_file_name, target_file_name, +def s3_upload_file(s3_bucket, source_file, target_file_name, headers, directory="", return_key_only=False, conn_name=DEFAULT_CONN): """ @@ -129,8 +138,8 @@ def s3_upload_file(s3_bucket, source_file_name, target_file_name, assert(len(upload_key) < 256) key = bucket.new_key(upload_key) - key.set_contents_from_filename( - source_file_name, headers=headers, + key.set_contents_from_file( + source_file, headers=headers, policy='bucket-owner-full-control') if return_key_only: diff --git a/pybossa/core.py b/pybossa/core.py index bd65091ebf..e1f08fbf97 100644 --- a/pybossa/core.py +++ b/pybossa/core.py @@ -340,6 +340,7 @@ def setup_blueprints(app): from pybossa.view.uploads import blueprint as uploads from pybossa.view.amazon import blueprint as amazon from pybossa.view.diagnostics import blueprint as diagnostics + from pybossa.view.fileproxy import blueprint as fileproxy blueprints = [{'handler': home, 'url_prefix': '/'}, {'handler': api, 'url_prefix': '/api'}, @@ -354,6 +355,7 @@ def setup_blueprints(app): {'handler': uploads, 'url_prefix': '/uploads'}, {'handler': amazon, 'url_prefix': '/amazon'}, {'handler': diagnostics, 'url_prefix': '/diagnostics'}, + {'handler': fileproxy, 'url_prefix': '/fileproxy'} ] for bp in blueprints: diff --git a/pybossa/encryption.py b/pybossa/encryption.py new file mode 100644 index 0000000000..d566c63566 --- /dev/null +++ b/pybossa/encryption.py @@ -0,0 +1,69 @@ +import base64 +from hashlib import sha256 +import os + +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends import default_backend +import six + + +class AESWithGCM(object): + + def __init__(self, key, iv_length=12, tag_length=16): + """ + Encrypt/Decrypt text using AES256 and GCM. The input to the encrypt + method and the output of decrypt method are base64 encoded byte + strings with the following structure: + + - the first byte of the string is the lenght of the IV in bytes + - the remaining is the concatenation of IV + ciphertext + tag + + @param key: the secret key, unhashed + @param iv_length: length of the initialization vector. Only needed for + encryption. + @param tag_length (bytes): only needed for decryption. Encryption always + produces 16 bytes tags. + """ + self.iv_length = iv_length + self.tag_length = tag_length + self.key = self._hash_key(key) + + @staticmethod + def _hash_key(key): + _hash = sha256() + _hash.update(key) + return _hash.digest() + + def get_cipher(self, iv, tag=None): + backend = default_backend() + mode = modes.GCM(iv, tag) + algo = algorithms.AES(self.key) + return Cipher(algo, mode, backend) + + def encrypt(self, string): + """ + @param string: a byte string to encrypt + """ + iv = os.urandom(self.iv_length) + encryptor = self.get_cipher(iv).encryptor() + ct = encryptor.update(string) + encryptor.finalize() + tag = encryptor.tag + encrypted = six.int2byte(self.iv_length) + iv + ct + tag + return base64.b64encode(encrypted) + + def _split_ciphertext(self, string): + iv_length = six.byte2int(string[0]) + iv = string[1:iv_length + 1] + ciphertext = string[iv_length + 1:-self.tag_length] + tag = string[-self.tag_length:] + return iv, ciphertext, tag + + def decrypt(self, string): + ''' + @param string: expected to be base64 encoded. + Return a byte string + ''' + decoded = base64.b64decode(string) + iv, ciphertext, tag = self._split_ciphertext(decoded) + decryptor = self.get_cipher(iv, tag).decryptor() + return decryptor.update(ciphertext) + decryptor.finalize() diff --git a/pybossa/model/project.py b/pybossa/model/project.py index d60cbf1f58..7ac2a00947 100644 --- a/pybossa/model/project.py +++ b/pybossa/model/project.py @@ -24,6 +24,7 @@ from flask import current_app from pybossa.core import db, signer +from pybossa.contributions_guard import ContributionsGuard from pybossa.model import DomainObject, make_timestamp, make_uuid from pybossa.model.task import Task from pybossa.model.task_run import TaskRun diff --git a/pybossa/util.py b/pybossa/util.py index 2ce8fc3991..6b0ead9a84 100644 --- a/pybossa/util.py +++ b/pybossa/util.py @@ -1016,3 +1016,10 @@ def delete_import_csv_file(path): delete_file_from_s3(s3_bucket, path, conn_name='S3_IMPORT') else: os.remove(path) + + +def sign_task(task): + if current_app.config.get('ENABLE_ENCRYPTION'): + from pybossa.core import signer + signature = signer.dumps({'task_id': task['id']}) + task['signature'] = signature diff --git a/pybossa/view/fileproxy.py b/pybossa/view/fileproxy.py new file mode 100644 index 0000000000..de849aa3d1 --- /dev/null +++ b/pybossa/view/fileproxy.py @@ -0,0 +1,98 @@ +# -*- coding: utf8 -*- +# This file is part of PYBOSSA. +# +# Copyright (C) 2018 Scifabric LTD. +# +# PYBOSSA is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PYBOSSA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with PYBOSSA. If not, see . + +from flask import Blueprint, current_app, Response, request +from flask.ext.login import current_user, login_required + +from werkzeug.exceptions import Forbidden, BadRequest, InternalServerError, NotFound + +from pybossa.cache.projects import get_project_data +from boto.exception import S3ResponseError +from pybossa.cloud_store_api.connection import create_connection +from pybossa.contributions_guard import ContributionsGuard +from pybossa.core import task_repo, signer +from pybossa.encryption import AESWithGCM +from pybossa.sched import has_lock + + +blueprint = Blueprint('files', __name__) + + +def check_allowed(user_id, task_id, project, file_url): + task = task_repo.get_task(task_id) + + if not task or task.project_id != project['id']: + raise BadRequest('Task does not exist') + + if file_url not in task.info.values(): + raise Forbidden('Invalid task content') + + if current_user.admin: + return True + + if has_lock(task_id, user_id, + project['info'].get('timeout', ContributionsGuard.STAMP_TTL)): + return True + + if user_id in project['owners_ids']: + return True + + raise Forbidden('FORBIDDEN') + + +@blueprint.route('/encrypted////') +@login_required +def encrypted_file(store, bucket, project_id, path): + """Proxy encrypted task file in a cloud storage""" + conn_args = current_app.config.get('S3_TASK_REQUEST', {}) + signature = request.args.get('task-signature') + if not signature: + raise Forbidden('FORBIDDEN') + + project = get_project_data(project_id) + timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) + + payload = signer.loads(signature, max_age=timeout) + task_id = payload['task_id'] + + check_allowed(current_user.id, task_id, project, request.path) + + ## download file + try: + key = '/{}/{}'.format(project_id, path) + conn = create_connection(**conn_args) + _bucket = conn.get_bucket(bucket, validate=False) + _key = _bucket.get_key(key, validate=False) + content = _key.get_contents_as_string() + except S3ResponseError as e: + if e.error_code == 'NoSuchKey': + raise NotFound('File Does Not Exist') + else: + raise InternalServerError('An Error Occurred') + + ## decyrpt file + secret = current_app.config.get('FILE_ENCRYPTION_KEY') + cipher = AESWithGCM(secret) + decrypted = cipher.decrypt(content) + + ## respond + response = Response(decrypted, content_type=_key.content_type) + + response.headers.add('Content-Encoding', _key.content_encoding) + response.headers.add('Content-Disposition', _key.content_disposition) + return response diff --git a/settings_local.py.tmpl b/settings_local.py.tmpl index 8d2d5585db..6bdd6309fd 100644 --- a/settings_local.py.tmpl +++ b/settings_local.py.tmpl @@ -401,3 +401,6 @@ VALID_PROJECT_LEVELS_FOR_TASK_LEVEL = dict( # assignable to the project. VALID_TASK_LEVELS_FOR_PROJECT_LEVEL = dict( L1=["L1", "L2", "L3", "L4"], L2=["L2", "L3", "L4"], L3=["L3", "L4"], L4=["L4"]) + +ENABLE_ENCRYPTION = False +ENCRYPTION_KEY = abcde \ No newline at end of file diff --git a/setup.py b/setup.py index d4cee5573a..a36596b584 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,8 @@ "wtforms-components>=0.10.3, <0.10.4", "yacryptopan", "Faker", - "flask-talisman>=0.5.0, <0.6.0" + "flask-talisman>=0.5.0, <0.6.0", + "cryptography>=2.3.1, <2.4.0" ] setup( diff --git a/test/test_api/test_task_signature.py b/test/test_api/test_task_signature.py new file mode 100644 index 0000000000..276b740181 --- /dev/null +++ b/test/test_api/test_task_signature.py @@ -0,0 +1,103 @@ +# -*- coding: utf8 -*- +# This file is part of PYBOSSA. +# +# Copyright (C) 2015 Scifabric LTD. +# +# PYBOSSA is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PYBOSSA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with PYBOSSA. If not, see . +import json +from default import db, with_context +from test_api import TestAPI +from mock import patch, MagicMock + +from factories import ProjectFactory, TaskFactory, UserFactory + +from pybossa.repositories import ProjectRepository +from pybossa.repositories import TaskRepository + + +class TestTaskSignature(TestAPI): + + @with_context + @patch('pybossa.api.task.TaskAPI._verify_auth') + def test_task_no_sign(self, auth): + """Get a list of tasks using a list of project_ids.""" + auth.return_value = True + admin, owner, user = UserFactory.create_batch(3) + project = ProjectFactory.create(owner=owner) + tasks = TaskFactory.create_batch(2, project=project) + + url = '/api/task/%s?api_key=%s' + + for u in [owner, admin, user]: + res = self.app.get(url % (tasks[0].id, u.api_key), follow_redirects=True) + assert 'signature' not in json.loads(res.data) + + @with_context + @patch('pybossa.api.task.TaskAPI._verify_auth') + def test_task_with_signature(self, auth): + """Get a list of tasks using a list of project_ids.""" + auth.return_value = True + admin, owner, user = UserFactory.create_batch(3) + project = ProjectFactory.create(owner=owner) + tasks = TaskFactory.create_batch(2, project=project) + + url = '/api/task/%s?api_key=%s' + + with patch.dict(self.flask_app.config, {'ENABLE_ENCRYPTION': True}): + for u in [owner, admin]: + res = self.app.get(url % (tasks[0].id, u.api_key), follow_redirects=True) + assert 'signature' in json.loads(res.data) + + res = self.app.get(url % (tasks[0].id, user.api_key), follow_redirects=True) + assert 'signature' not in json.loads(res.data) + + @with_context + @patch('pybossa.api.task.TaskAPI._verify_auth') + def test_list_tasks(self, auth): + """Get a list of tasks using a list of project_ids.""" + auth.return_value = True + users = UserFactory.create_batch(3) + project = ProjectFactory.create(owner=users[1]) + tasks = TaskFactory.create_batch(2, project=project) + + url = '/api/task?api_key=%s&all=1' + + with patch.dict(self.flask_app.config, {'ENABLE_ENCRYPTION': True}): + for u in users: + res = self.app.get(url % u.api_key, follow_redirects=True) + tasks = json.loads(res.data) + for task in tasks: + assert 'signature' not in task + + @with_context + @patch('pybossa.api.task.TaskAPI._verify_auth') + @patch('pybossa.api.get_pwd_manager') + def test_newtask(self, get_pwd_manager, auth): + """Get a list of tasks using a list of project_ids.""" + auth.return_value = True + pwd_manager = MagicMock() + pwd_manager.password_needed.return_value = False + get_pwd_manager.return_value = pwd_manager + + users = UserFactory.create_batch(3) + project = ProjectFactory.create(owner=users[1]) + tasks = TaskFactory.create_batch(2, project=project) + + url = '/api/project/%s/newtask?api_key=%s' + + with patch.dict(self.flask_app.config, {'ENABLE_ENCRYPTION': True}): + for u in users: + res = self.app.get(url % (project.id, u.api_key), follow_redirects=True) + task = json.loads(res.data) + assert 'signature' in task diff --git a/test/test_api/test_taskrun_with_file.py b/test/test_api/test_taskrun_with_file.py index feda339f3b..6ccc845696 100644 --- a/test/test_api/test_taskrun_with_file.py +++ b/test/test_api/test_taskrun_with_file.py @@ -24,6 +24,7 @@ from pybossa.core import db from pybossa.model.task_run import TaskRun from pybossa.cloud_store_api.s3 import s3_upload_from_string +from pybossa.encryption import AESWithGCM class TestTaskrunWithFile(TestAPI): @@ -61,7 +62,7 @@ def test_taskrun_empty_info(self): assert success.status_code == 200, success.data @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_taskrun_with_upload(self, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() @@ -98,7 +99,7 @@ def test_taskrun_with_upload(self, set_content): assert url == expected, url @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_taskrun_with_no_upload(self, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() @@ -124,13 +125,12 @@ def test_taskrun_with_no_upload(self, set_content): assert res['info']['test__upload_url']['test'] == 'not a file' @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_taskrun_multipart(self, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() task = TaskFactory.create(project=project) self.app.get('/api/project/%s/newtask?api_key=%s' % (project.id, project.owner.api_key)) - data = dict( project_id=project.id, task_id=task.id, @@ -163,7 +163,7 @@ def test_taskrun_multipart(self, set_content): assert url == expected, url @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_taskrun_multipart_error(self, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() @@ -200,8 +200,9 @@ class TestTaskrunWithSensitiveFile(TestAPI): 'host': host, 'auth_headers': [('a', 'b')] }, - 'PRIVATE_INSTANCE': True, - 'S3_BUCKET': 'test_bucket' + 'ENABLE_ENCRYPTION': True, + 'S3_BUCKET': 'test_bucket', + 'FILE_ENCRYPTION_KEY': 'testkey' } def setUp(self): @@ -209,7 +210,7 @@ def setUp(self): db.session.query(TaskRun).delete() @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') @patch('pybossa.api.task_run.s3_upload_from_string', wraps=s3_upload_from_string) def test_taskrun_with_upload(self, upload_from_string, set_content): with patch.dict(self.flask_app.config, self.patch_config): @@ -248,9 +249,18 @@ def test_taskrun_with_upload(self, upload_from_string, set_content): expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format(**args) assert url == expected, url + aes = AESWithGCM('testkey') + # first call + first_call = set_content.call_args_list[0] + args, kwargs = first_call + encrypted = args[0].read() + content = aes.decrypt(encrypted) + assert encrypted != content + assert content == 'abc' + upload_from_string.assert_called() - args, kwargs = upload_from_string.call_args - _, content, _ = args + args, kwargs = set_content.call_args + content = aes.decrypt(args[0].read()) actual_content = json.loads(content) args = { @@ -266,7 +276,7 @@ def test_taskrun_with_upload(self, upload_from_string, set_content): assert actual_content['another_field'] == 42 @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_taskrun_multipart(self, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() diff --git a/test/test_cloud_store_api/test_s3_uploader.py b/test/test_cloud_store_api/test_s3_uploader.py index 7cedb9fb9b..a09613dd99 100644 --- a/test/test_cloud_store_api/test_s3_uploader.py +++ b/test/test_cloud_store_api/test_s3_uploader.py @@ -53,7 +53,7 @@ def test_invalid_directory(self): assert_raises(RuntimeError, validate_directory, 'hello$world') @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_upload_from_string(self, set_contents): with patch.dict(self.flask_app.config, self.default_config): url = s3_upload_from_string('bucket', u'hello world', 'test.txt') @@ -67,7 +67,7 @@ def test_upload_from_string_exception(self, open): 'bucket', u'hellow world', 'test.txt') @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_upload_from_string_return_key(self, set_contents): with patch.dict(self.flask_app.config, self.default_config): key = s3_upload_from_string('bucket', u'hello world', 'test.txt', @@ -75,7 +75,7 @@ def test_upload_from_string_return_key(self, set_contents): assert key == 'test.txt', key @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') def test_upload_from_storage(self, set_contents): with patch.dict(self.flask_app.config, self.default_config): stream = StringIO('Hello world!') @@ -86,7 +86,7 @@ def test_upload_from_storage(self, set_contents): assert url == 'https://s3.storage.com/bucket/test.txt', url @with_context - @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_filename') + @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.set_contents_from_file') @patch('pybossa.cloud_store_api.s3.boto.s3.key.Key.generate_url') def test_upload_remove_query_params(self, generate_url, set_content): with patch.dict(self.flask_app.config, self.default_config): diff --git a/test/test_encryption.py b/test/test_encryption.py new file mode 100644 index 0000000000..3a5bd72042 --- /dev/null +++ b/test/test_encryption.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +from pybossa.encryption import AESWithGCM + + +class TestAes(object): + + def setUp(self): + iv_length = 12 + tag_length = 16 + secret = bytearray('very secret', 'ascii') + self.aes = AESWithGCM(secret, iv_length, tag_length) + + def test_aes(self): + text = 'testing simple encrytion' + encrypted = self.aes.encrypt(text) + assert encrypted != text + decrypted = self.aes.decrypt(encrypted) + assert decrypted == text + + def test_aes_2(self): + original = 'this is a test string I plan to encrypt' + encrypted = 'DMj4/yC2pgzgAg76TApmk7zVZlaG0B47KASCnS/TqH6fQpA9UaHjmGLHqCfvGVVQcSivX76Oy349QivZjOJ2yfXZRb0=' + secret = bytearray('this is my super secret key', 'ascii') + aes = AESWithGCM(secret) + assert aes.decrypt(encrypted) == original + + def test_aes_unicode(self): + text = u'∀ z ∈ ℂ, ζ(z) = 0 ⇒ ((z ∈ -2ℕ) ∨ (Re(z) = -½))' + encrypted = self.aes.encrypt(text.encode('utf-8')) + decrypted = self.aes.decrypt(encrypted).decode('utf-8') + assert text == decrypted diff --git a/test/test_view/test_fileproxy.py b/test/test_view/test_fileproxy.py new file mode 100644 index 0000000000..c879628363 --- /dev/null +++ b/test/test_view/test_fileproxy.py @@ -0,0 +1,210 @@ +# -*- coding: utf8 -*- +# This file is part of PYBOSSA. +# +# Copyright (C) 2015 Scifabric LTD. +# +# PYBOSSA is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PYBOSSA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with PYBOSSA. If not, see . + + +from default import with_context +import json +from helper import web +from mock import patch, MagicMock +from factories import ProjectFactory, TaskFactory, UserFactory +from pybossa.core import signer +from pybossa.encryption import AESWithGCM +from boto.exception import S3ResponseError + + +class TestFileproxy(web.Helper): + + def get_key(self, create_connection): + key = MagicMock() + bucket = MagicMock() + bucket.get_key.return_value = key + conn = MagicMock() + conn.get_bucket.return_value = bucket + create_connection.return_value = conn + return key + + @with_context + def test_proxy_no_signature(self): + project = ProjectFactory.create() + owner = project.owner + + url = '/fileproxy/encrypted/s3/test/%s/file.pdf?api_key=%s' \ + % (project.id, owner.api_key) + res = self.app.get(url, follow_redirects=True) + assert res.status_code == 403, res.status_code + + @with_context + def test_proxy_no_task(self): + project = ProjectFactory.create() + owner = project.owner + + signature = signer.dumps({'task_id': 100}) + + url = '/fileproxy/encrypted/s3/test/%s/file.pdf?api_key=%s&task-signature=%s' \ + % (project.id, owner.api_key, signature) + res = self.app.get(url, follow_redirects=True) + assert res.status_code == 400, res.status_code + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_proxy_owner(self, create_connection): + project = ProjectFactory.create() + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + owner = project.owner + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, owner.api_key, signature) + + encryption_key = 'testkey' + aes = AESWithGCM(encryption_key) + key = self.get_key(create_connection) + key.get_contents_as_string.return_value = aes.encrypt('the content') + + with patch.dict(self.flask_app.config, { + 'FILE_ENCRYPTION_KEY': encryption_key + }): + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 200, res.status_code + assert res.data == 'the content', res.data + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_proxy_admin(self, create_connection): + admin, owner = UserFactory.create_batch(2) + project = ProjectFactory.create(owner=owner) + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key, signature) + + encryption_key = 'testkey' + aes = AESWithGCM(encryption_key) + key = self.get_key(create_connection) + key.get_contents_as_string.return_value = aes.encrypt('the content') + + with patch.dict(self.flask_app.config, { + 'FILE_ENCRYPTION_KEY': encryption_key + }): + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 200, res.status_code + assert res.data == 'the content', res.data + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_file_not_in_task(self, create_connection): + project = ProjectFactory.create() + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': 'not/the/same' + }) + owner = project.owner + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, owner.api_key, signature) + + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 403, res.status_code + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_file_user(self, create_connection): + admin, owner, user = UserFactory.create_batch(3) + project = ProjectFactory.create() + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, user.api_key, signature) + + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 403, res.status_code + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + @patch('pybossa.view.fileproxy.has_lock') + def test_file_user(self, has_lock, create_connection): + has_lock.return_value = True + admin, owner, user = UserFactory.create_batch(3) + project = ProjectFactory.create() + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, user.api_key, signature) + + encryption_key = 'testkey' + aes = AESWithGCM(encryption_key) + key = self.get_key(create_connection) + key.get_contents_as_string.return_value = aes.encrypt('the content') + + with patch.dict(self.flask_app.config, { + 'FILE_ENCRYPTION_KEY': encryption_key + }): + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 200, res.status_code + assert res.data == 'the content', res.data + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_proxy_s3_error(self, create_connection): + admin, owner = UserFactory.create_batch(2) + project = ProjectFactory.create(owner=owner) + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key, signature) + + key = self.get_key(create_connection) + key.get_contents_as_string.side_effect = S3ResponseError(403, 'Forbidden') + + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 500, res.status_code + + @with_context + @patch('pybossa.view.fileproxy.create_connection') + def test_proxy_key_not_found(self, create_connection): + admin, owner = UserFactory.create_batch(2) + project = ProjectFactory.create(owner=owner) + url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id + task = TaskFactory.create(project=project, info={ + 'url': url + }) + + signature = signer.dumps({'task_id': task.id}) + req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key, signature) + + key = self.get_key(create_connection) + exception = S3ResponseError(404, 'NoSuchKey') + exception.error_code = 'NoSuchKey' + key.get_contents_as_string.side_effect = exception + + res = self.app.get(req_url, follow_redirects=True) + assert res.status_code == 404, res.status_code