Permalink
Browse files

Added MAT to source.py as a metadata purge option.

- MAT consist of a python library that makes
use of other metadata tools dedicated to specific
file-formats.

Always nice tidy and neat.

Added checkbox for metadata purge.

- Also added more validation on the
file selection for the cleanup.

Better validation on what file to write to.

Added MAT to requirements.txt

- Note: MAT is not available in
  PyPi, therefore, we clone it
  from the Tor repository.

Added MAT to source-requirements.txt

Added exiftool and poppler dependency to Debian/Ubuntu script.

Added tests for binary file upload.

Added test images for MAT

Added MAT to source-requirements and setup_dev.sh

Added intltool for MAT to travis.yml

Added secure_unlink and sanitize_metadata function.
  • Loading branch information...
1 parent dee723c commit cf6914bc111947ff52ae9443a24b3342c5cae974 @gnusosa committed Mar 3, 2014
View
@@ -2,7 +2,9 @@ language: python
python:
- "2.7"
install:
+ - sudo apt-get install intltool
- pip install --upgrade distribute
+ - pip install https://launchpad.net/python-distutils-extra/trunk/2.38/+download/python-distutils-extra-2.38.tar.gz
- pip install -r securedrop/source-requirements.txt
- pip install -r securedrop/document-requirements.txt
- pip install -r securedrop/test-requirements.txt
@@ -7,3 +7,6 @@ apache2-mpm-worker
libapache2-mod-wsgi
python-pip
python-dev
+libimage-exiftool-perl
+python-poppler
+python-distutils-extra
@@ -12,3 +12,8 @@ pycrypto==2.6.1
gnupg-securedrop==1.2.5-9-g6f9d63a-dirty
scrypt==0.6.1
wsgiref==0.1.2
+hachoir-core==1.3.3
+hachoir-parser==1.3.4
+mutagen==1.22
+pdfrw==0.1
+https://mat.boum.org/files/mat-0.4.2.tar.gz
@@ -172,12 +172,13 @@ def async_genkey(sid, codename):
def submit():
msg = request.form['msg']
fh = request.files['fh']
+ strip_metadata = True if 'notclean' in request.form else False
if msg:
store.save_message_submission(g.sid, msg)
flash("Thanks! We received your message.", "notification")
if fh:
- store.save_file_submission(g.sid, fh.filename, fh.stream)
+ store.save_file_submission(g.sid, fh.filename, fh.stream, fh.content_type, strip_metadata)
flash("Thanks! We received your document '%s'."
% fh.filename or '[unnamed]', "notification")
@@ -29,7 +29,9 @@
<input name="csrf_token" type="hidden" value="{{ csrf_token() }}"/>
<p style="padding-bottom: 0"><b>Upload a file:</b></p>
<div id="browse-select">
- <input type="file" name="fh" autocomplete="off"/>
+ <input type="file" name="fh" autocomplete="off"/><br />
+ <input type="checkbox" id="notclean" name="notclean" value="True" />
+ <label for="cleanup">Remove all the metadata of the file.</label>
</div>
<p><b>Or just enter a message:</b></p>
@@ -145,6 +145,16 @@ form input#filename{
padding:0 10px;
}
+form input#cleanup{
+ color:#666;
+ border:none;
+ font-family: Helvetica, Arial, Verdana, sans-serif;
+ font-weight:400;
+ font-size:12px;
+ height:30px;
+ margin:15px 5px 0 5px;
+ padding:0 10px;
+}
/* add back (removed by reset) indents and bullets for plain lists in text */
ul {
View
@@ -8,6 +8,10 @@
import tempfile
import subprocess
from cStringIO import StringIO
+from shutil import copyfileobj
+
+from MAT import mat
+from MAT import strippers
import logging
log = logging.getLogger(__name__)
@@ -74,26 +78,26 @@ def get_bulk_archive(filenames):
zip.write(filename, arcname=os.path.basename(filename))
return zip_file
-
-def save_file_submission(sid, filename, stream):
+def save_file_submission(sid, filename, stream, content_type, strip_metadata):
sanitized_filename = secure_filename(filename)
+ clean_file = sanitize_metadata(stream, content_type, strip_metadata)
s = StringIO()
with zipfile.ZipFile(s, 'w') as zf:
- zf.writestr(sanitized_filename, stream.read())
+ zf.writestr(sanitized_filename, clean_file.read() if clean_file else stream.read())
s.reset()
file_loc = path(sid, "%s_doc.zip.gpg" % uuid.uuid4())
crypto_util.encrypt(config.JOURNALIST_KEY, s, file_loc)
-
def save_message_submission(sid, message):
msg_loc = path(sid, '%s_msg.gpg' % uuid.uuid4())
crypto_util.encrypt(config.JOURNALIST_KEY, message, msg_loc)
-def secure_unlink(fn, recursive=False):
- verify(fn)
+def secure_unlink(fn, recursive=False, do_verify = True):
+ if do_verify:
+ verify(fn)
command = ['srm']
if recursive:
command.append('-r')
@@ -103,3 +107,34 @@ def secure_unlink(fn, recursive=False):
def delete_source_directory(source_id):
secure_unlink(path(source_id), recursive=True)
+
+def metadata_handler(f):
+ return mat.create_class_file(f, False, add2archive=True)
+
+def sanitize_metadata(stream, content_type, strip_metadata):
+ text_plain = content_type == 'text/plain'
+
+ s = None
+ t = None
+ clean_file = False
+
+ if strip_metadata and not text_plain:
+ t = tempfile.NamedTemporaryFile(delete = False)
+ copyfileobj(stream, t)
+ t.flush()
+ file_meta = metadata_handler(t.name)
+
+ if not file_meta.is_clean():
+ file_meta.remove_all()
+ f = open(t.name)
+ s = StringIO()
+ s.write(f.read())
+ f.close()
+ s.reset()
+ secure_unlink(t.name, do_verify = False)
+ t.close()
+ else:
+ secure_unlink(t.name, do_verify = False)
+ t.close()
+
+ return s
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -25,7 +25,6 @@
import journalist
import test_setup
-
def _block_on_reply_keypair_gen(codename):
sid = crypto_util.hash_codename(codename)
while not crypto_util.getkey(sid):
@@ -192,6 +191,104 @@ def test_submit_both(self):
self.assertIn(escape("Thanks! We received your document 'test.txt'."),
rv.data)
+ def test_submit_dirty_file_to_be_cleaned(self):
+ self.gpg = gnupg.GPG(homedir=config.GPG_KEY_DIR)
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/dirty.jpg')
+ img_metadata = store.metadata_handler(img.name)
+ self.assertFalse(img_metadata.is_clean(), "The file is dirty.")
+ del(img_metadata)
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'dirty.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'dirty.jpg'."),
+ rv.data)
+
+ store_dirs = [os.path.join(config.STORE_DIR,d) for d in os.listdir(config.STORE_DIR) if os.path.isdir(os.path.join(config.STORE_DIR,d))]
+ latest_subdir = max(store_dirs, key=os.path.getmtime)
+ zip_gpg_files = [os.path.join(latest_subdir,f) for f in os.listdir(latest_subdir) if os.path.isfile(os.path.join(latest_subdir,f))]
+ zip_gpg = max(zip_gpg_files, key=os.path.getmtime)
+
+ zip_gpg_file = open(zip_gpg)
+ decrypted_data = self.gpg.decrypt_file(zip_gpg_file)
+ self.assertTrue(decrypted_data.ok, 'Checking the integrity of the data after decryption.')
+
+ s = StringIO(decrypted_data.data)
+ zip_file = zipfile.ZipFile(s, 'r')
+ clean_file = open(os.path.join(latest_subdir,'dirty.jpg'), 'w+b')
+ clean_file.write(zip_file.read('dirty.jpg'))
+ clean_file.seek(0)
+ zip_file.close()
+
+ # check for the actual file been clean
+ clean_file_metadata = store.metadata_handler(clean_file.name)
+ self.assertTrue(clean_file_metadata.is_clean(), "the file is now clean.")
+ del(clean_file_metadata)
+ zip_gpg_file.close()
+ clean_file.close()
+ img.close()
+
+ def test_submit_dirty_file_to_not_clean(self):
+ self.gpg = gnupg.GPG(homedir=config.GPG_KEY_DIR)
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/dirty.jpg')
+ img_metadata = store.metadata_handler(img.name)
+ self.assertFalse(img_metadata.is_clean(), "The file is dirty.")
+ del(img_metadata)
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'dirty.jpg'),
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'dirty.jpg'."),
+ rv.data)
+
+ store_dirs = [os.path.join(config.STORE_DIR,d) for d in os.listdir(config.STORE_DIR) if os.path.isdir(os.path.join(config.STORE_DIR,d))]
+ latest_subdir = max(store_dirs, key=os.path.getmtime)
+ zip_gpg_files = [os.path.join(latest_subdir,f) for f in os.listdir(latest_subdir) if os.path.isfile(os.path.join(latest_subdir,f))]
+ zip_gpg = max(zip_gpg_files, key=os.path.getmtime)
+
+ zip_gpg_file = open(zip_gpg)
+ decrypted_data = self.gpg.decrypt_file(zip_gpg_file)
+ self.assertTrue(decrypted_data.ok, 'Checking the integrity of the data after decryption.')
+
+ s = StringIO(decrypted_data.data)
+ zip_file = zipfile.ZipFile(s, 'r')
+ clean_file = open(os.path.join(latest_subdir,'dirty.jpg'), 'w+b')
+ clean_file.write(zip_file.read('dirty.jpg'))
+ clean_file.seek(0)
+ zip_file.close()
+
+ # check for the actual file been clean
+ clean_file_metadata = store.metadata_handler(clean_file.name)
+ self.assertFalse(clean_file_metadata.is_clean(), "the file is was not cleaned.")
+ del(clean_file_metadata)
+ zip_gpg_file.close()
+ clean_file.close()
+ img.close()
+
+ def test_submit_clean_file(self):
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/clean.jpg')
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'clean.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'clean.jpg'."),
+ rv.data)
+ img.close()
+
@patch('zipfile.ZipFile.writestr')
def test_submit_sanitizes_filename(self, zipfile_write):
"""Test that upload file name is sanitized"""
View
@@ -19,7 +19,7 @@ EOS
SOURCE_ROOT=$(dirname $0)
securedrop_root=$(pwd)/.securedrop
-DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite"
+DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite python-distutils-extra"
while getopts "r:uh" OPTION; do

0 comments on commit cf6914b

Please sign in to comment.