Skip to content

Commit

Permalink
Merge pull request #78 from bloomberg/crowd-1221-file-encryption
Browse files Browse the repository at this point in the history
Crowd 1221 file encryption
  • Loading branch information
fiorda committed Sep 4, 2018
2 parents 3b8a309 + 7a8440a commit 445973d
Show file tree
Hide file tree
Showing 17 changed files with 601 additions and 33 deletions.
8 changes: 8 additions & 0 deletions pybossa/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from pybossa.ratelimit import ratelimit
from pybossa.cache.projects import n_tasks
import pybossa.sched as sched
from pybossa.util import sign_task
from pybossa.error import ErrorStatus
from global_stats import GlobalStatsAPI
from task import TaskAPI
Expand Down Expand Up @@ -133,6 +134,12 @@ def register_api(view, endpoint, url, pk='id', pk_type='int'):
register_api(ProjectByNameAPI, 'api_projectbyname', '/projectbyname', pk='key', pk_type='string')


def add_task_signature(tasks):
if current_app.config.get('ENABLE_ENCRYPTION'):
for task in tasks:
sign_task(task)


@jsonpify
@blueprint.route('/project/<project_id>/newtask')
@ratelimit(limit=ratelimits.get('LIMIT'), per=ratelimits.get('PER'))
Expand Down Expand Up @@ -160,6 +167,7 @@ def new_task(project_id):
guard.extend_task_presented_timestamp_expiry(task, user_id_or_ip)

data = [task.dictize() for task in tasks]
add_task_signature(data)
if len(data) == 0:
response = make_response(json.dumps({}))
elif len(data) == 1:
Expand Down
5 changes: 5 additions & 0 deletions pybossa/api/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def _create_json_response(self, query_result, oid):
if not items:
raise Forbidden('Forbidden')
ensure_authorized_to('read', query_result[0])
self._sign_item(items[0])
items = items[0]
return json.dumps(items)

Expand Down Expand Up @@ -555,3 +556,7 @@ def _verify_auth(self, item):
on the items to return
"""
return True

def _sign_item(self, item):
"""Apply custom signature"""
pass
7 changes: 7 additions & 0 deletions pybossa/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pybossa.model.task import Task
from pybossa.model.project import Project
from pybossa.core import result_repo
from pybossa.util import sign_task
from api_base import APIBase
from pybossa.api.pwd_manager import get_pwd_manager
from pybossa.util import get_user_id_or_ip, validate_required_fields
Expand Down Expand Up @@ -78,3 +79,9 @@ def _verify_auth(self, item):
project = Project(**get_project_data(item.project_id))
pwd_manager = get_pwd_manager(project)
return not pwd_manager.password_needed(project, get_user_id_or_ip())

def _sign_item(self, item):
project_id = item['project_id']
if current_user.admin or \
current_user.id in get_project_data(project_id)['owners_ids']:
sign_task(item)
20 changes: 12 additions & 8 deletions pybossa/api/task_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ def _preprocess_post_data(self, data):
user_id = current_user.id
self.check_can_post(project_id, task_id, user_id)
info = data.get('info')
with_encryption = app.config.get('ENABLE_ENCRYPTION')
if info is None:
return
path = "{0}/{1}/{2}".format(project_id, task_id, user_id)
_upload_files_from_json(info, path)
_upload_files_from_request(info, request.files, path)
if app.config.get('PRIVATE_INSTANCE'):
_upload_files_from_json(info, path, with_encryption)
_upload_files_from_request(info, request.files, path, with_encryption)
if with_encryption:
data['info'] = {
'pyb_answer_url': _upload_task_run(info, path)
}
Expand Down Expand Up @@ -153,7 +154,7 @@ def _validate_datetime(self, timestamp):
return timestamp.isoformat()


def _upload_files_from_json(task_run_info, upload_path):
def _upload_files_from_json(task_run_info, upload_path, with_encryption):
if not isinstance(task_run_info, dict):
return
for key, value in task_run_info.iteritems():
Expand All @@ -165,23 +166,26 @@ def _upload_files_from_json(task_run_info, upload_path):
out_url = s3_upload_from_string(app.config.get("S3_BUCKET"),
content,
filename,
directory=upload_path, conn_name='S3_TASKRUN')
directory=upload_path, conn_name='S3_TASKRUN',
with_encryption = with_encryption)
task_run_info[key] = out_url


def _upload_files_from_request(task_run_info, files, upload_path):
def _upload_files_from_request(task_run_info, files, upload_path, with_encryption):
for key in files:
if not key.endswith('__upload_url'):
raise BadRequest("File upload field should end in __upload_url")
file_obj = request.files[key]
s3_url = s3_upload_file_storage(app.config.get("S3_BUCKET"),
file_obj,
directory=upload_path, conn_name='S3_TASKRUN')
directory=upload_path, conn_name='S3_TASKRUN',
with_encryption = with_encryption)
task_run_info[key] = s3_url


def _upload_task_run(task_run, upload_path):
content = json.dumps(task_run, ensure_ascii=False)
return s3_upload_from_string(app.config.get("S3_BUCKET"),
content, 'pyb_answer.json',
directory=upload_path, conn_name='S3_TASKRUN')
directory=upload_path, conn_name='S3_TASKRUN',
with_encryption = True)
27 changes: 18 additions & 9 deletions pybossa/cloud_store_api/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from urlparse import urlparse
import boto
from boto.s3.key import Key
from six import BytesIO
from flask import current_app as app
from werkzeug.utils import secure_filename
import magic
from werkzeug.exceptions import BadRequest, InternalServerError
from pybossa.cloud_store_api.connection import create_connection
from pybossa.encryption import AESWithGCM

allowed_mime_types = ['application/pdf',
'text/csv',
Expand Down Expand Up @@ -58,20 +60,20 @@ def tmp_file_from_string(string):

def s3_upload_from_string(s3_bucket, string, filename, headers=None,
directory='', file_type_check=True,
return_key_only=False, conn_name=DEFAULT_CONN):
return_key_only=False, conn_name=DEFAULT_CONN, with_encryption=False):
"""
Upload a string to s3
"""
tmp_file = tmp_file_from_string(string)
headers = headers or {}
return s3_upload_tmp_file(
s3_bucket, tmp_file, filename, headers, directory, file_type_check,
return_key_only, conn_name)
return_key_only, conn_name, with_encryption)


def s3_upload_file_storage(s3_bucket, source_file, headers=None, directory='',
file_type_check=True, return_key_only=False,
conn_name=DEFAULT_CONN):
conn_name=DEFAULT_CONN, with_encryption=False):
"""
Upload a werzkeug FileStorage content to s3
"""
Expand All @@ -82,19 +84,26 @@ def s3_upload_file_storage(s3_bucket, source_file, headers=None, directory='',
source_file.save(tmp_file.name)
return s3_upload_tmp_file(
s3_bucket, tmp_file, filename, headers, directory, file_type_check,
return_key_only, conn_name)
return_key_only, conn_name, with_encryption)


def s3_upload_tmp_file(s3_bucket, tmp_file, filename,
headers, directory='', file_type_check=True,
return_key_only=False, conn_name=DEFAULT_CONN):
return_key_only=False, conn_name=DEFAULT_CONN,
with_encryption=False):
"""
Upload the content of a temporary file to s3 and delete the file
"""
try:
if file_type_check:
check_type(tmp_file.name)
url = s3_upload_file(s3_bucket, tmp_file.name, filename, headers,
content = tmp_file.read()
if with_encryption:
secret = app.config.get('FILE_ENCRYPTION_KEY')
cipher = AESWithGCM(secret)
content = cipher.encrypt(content)
fp = BytesIO(content)
url = s3_upload_file(s3_bucket, fp, filename, headers,
directory, return_key_only, conn_name)
finally:
os.unlink(tmp_file.name)
Expand All @@ -108,7 +117,7 @@ def form_upload_directory(directory, filename):
return "/".join(part for part in parts if part)


def s3_upload_file(s3_bucket, source_file_name, target_file_name,
def s3_upload_file(s3_bucket, source_file, target_file_name,
headers, directory="", return_key_only=False,
conn_name=DEFAULT_CONN):
"""
Expand All @@ -129,8 +138,8 @@ def s3_upload_file(s3_bucket, source_file_name, target_file_name,
assert(len(upload_key) < 256)
key = bucket.new_key(upload_key)

key.set_contents_from_filename(
source_file_name, headers=headers,
key.set_contents_from_file(
source_file, headers=headers,
policy='bucket-owner-full-control')

if return_key_only:
Expand Down
2 changes: 2 additions & 0 deletions pybossa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def setup_blueprints(app):
from pybossa.view.uploads import blueprint as uploads
from pybossa.view.amazon import blueprint as amazon
from pybossa.view.diagnostics import blueprint as diagnostics
from pybossa.view.fileproxy import blueprint as fileproxy

blueprints = [{'handler': home, 'url_prefix': '/'},
{'handler': api, 'url_prefix': '/api'},
Expand All @@ -354,6 +355,7 @@ def setup_blueprints(app):
{'handler': uploads, 'url_prefix': '/uploads'},
{'handler': amazon, 'url_prefix': '/amazon'},
{'handler': diagnostics, 'url_prefix': '/diagnostics'},
{'handler': fileproxy, 'url_prefix': '/fileproxy'}
]

for bp in blueprints:
Expand Down
69 changes: 69 additions & 0 deletions pybossa/encryption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import base64
from hashlib import sha256
import os

from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import six


class AESWithGCM(object):

def __init__(self, key, iv_length=12, tag_length=16):
"""
Encrypt/Decrypt text using AES256 and GCM. The input to the encrypt
method and the output of decrypt method are base64 encoded byte
strings with the following structure:
- the first byte of the string is the lenght of the IV in bytes
- the remaining is the concatenation of IV + ciphertext + tag
@param key: the secret key, unhashed
@param iv_length: length of the initialization vector. Only needed for
encryption.
@param tag_length (bytes): only needed for decryption. Encryption always
produces 16 bytes tags.
"""
self.iv_length = iv_length
self.tag_length = tag_length
self.key = self._hash_key(key)

@staticmethod
def _hash_key(key):
_hash = sha256()
_hash.update(key)
return _hash.digest()

def get_cipher(self, iv, tag=None):
backend = default_backend()
mode = modes.GCM(iv, tag)
algo = algorithms.AES(self.key)
return Cipher(algo, mode, backend)

def encrypt(self, string):
"""
@param string: a byte string to encrypt
"""
iv = os.urandom(self.iv_length)
encryptor = self.get_cipher(iv).encryptor()
ct = encryptor.update(string) + encryptor.finalize()
tag = encryptor.tag
encrypted = six.int2byte(self.iv_length) + iv + ct + tag
return base64.b64encode(encrypted)

def _split_ciphertext(self, string):
iv_length = six.byte2int(string[0])
iv = string[1:iv_length + 1]
ciphertext = string[iv_length + 1:-self.tag_length]
tag = string[-self.tag_length:]
return iv, ciphertext, tag

def decrypt(self, string):
'''
@param string: expected to be base64 encoded.
Return a byte string
'''
decoded = base64.b64decode(string)
iv, ciphertext, tag = self._split_ciphertext(decoded)
decryptor = self.get_cipher(iv, tag).decryptor()
return decryptor.update(ciphertext) + decryptor.finalize()
1 change: 1 addition & 0 deletions pybossa/model/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from flask import current_app

from pybossa.core import db, signer
from pybossa.contributions_guard import ContributionsGuard
from pybossa.model import DomainObject, make_timestamp, make_uuid
from pybossa.model.task import Task
from pybossa.model.task_run import TaskRun
Expand Down
7 changes: 7 additions & 0 deletions pybossa/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,3 +1016,10 @@ def delete_import_csv_file(path):
delete_file_from_s3(s3_bucket, path, conn_name='S3_IMPORT')
else:
os.remove(path)


def sign_task(task):
if current_app.config.get('ENABLE_ENCRYPTION'):
from pybossa.core import signer
signature = signer.dumps({'task_id': task['id']})
task['signature'] = signature

0 comments on commit 445973d

Please sign in to comment.