Permalink
Browse files

Added MAT to source.py as a metadata purge option.

- MAT consist of a python library that makes
use of other metadata tools dedicated to specific
file-formats.

Always nice tidy and neat.

Added checkbox for metadata purge.

- Also added more validation on the
file selection for the cleanup.

Better validation on what file to write to.

Added MAT to requirements.txt

- Note: MAT is not available in
  PyPi, therefore, we clone it
  from the Tor repository.

Added MAT to source-requirements.txt

Added exiftool and poppler dependency to Debian/Ubuntu script.

Added tests for binary file upload.

Added test images for MAT

Added MAT to source-requirements and setup_dev.sh

Added intltool for MAT to travis.yml
  • Loading branch information...
1 parent dee723c commit 43c2692dd1d843f05cacaa38455f06575e331a79 @gnusosa committed Mar 3, 2014
View
@@ -2,7 +2,9 @@ language: python
python:
- "2.7"
install:
+ - sudo apt-get install intltool
- pip install --upgrade distribute
+ - pip install https://launchpad.net/python-distutils-extra/trunk/2.38/+download/python-distutils-extra-2.38.tar.gz
- pip install -r securedrop/source-requirements.txt
- pip install -r securedrop/document-requirements.txt
- pip install -r securedrop/test-requirements.txt
@@ -7,3 +7,6 @@ apache2-mpm-worker
libapache2-mod-wsgi
python-pip
python-dev
+libimage-exiftool-perl
+python-poppler
+python-distutils-extra
@@ -12,3 +12,8 @@ pycrypto==2.6.1
gnupg-securedrop==1.2.5-9-g6f9d63a-dirty
scrypt==0.6.1
wsgiref==0.1.2
+hachoir-core==1.3.3
+hachoir-parser==1.3.4
+mutagen==1.22
+pdfrw==0.1
+https://mat.boum.org/files/mat-0.4.2.tar.gz
View
@@ -172,12 +172,13 @@ def async_genkey(sid, codename):
def submit():
msg = request.form['msg']
fh = request.files['fh']
+ not_clean = True if 'notclean' in request.form else False
if msg:
store.save_message_submission(g.sid, msg)
flash("Thanks! We received your message.", "notification")
if fh:
- store.save_file_submission(g.sid, fh.filename, fh.stream)
+ store.save_file_submission(g.sid, fh.filename, fh.stream, fh.content_type, not_clean)
flash("Thanks! We received your document '%s'."
% fh.filename or '[unnamed]', "notification")
@@ -29,7 +29,9 @@
<input name="csrf_token" type="hidden" value="{{ csrf_token() }}"/>
<p style="padding-bottom: 0"><b>Upload a file:</b></p>
<div id="browse-select">
- <input type="file" name="fh" autocomplete="off"/>
+ <input type="file" name="fh" autocomplete="off"/><br />
+ <input type="checkbox" id="cleanup" name="notclean" value="True" />
+ <label for="cleanup">Remove all the metadata of the file.</label>
</div>
<p><b>Or just enter a message:</b></p>
@@ -145,6 +145,16 @@ form input#filename{
padding:0 10px;
}
+form input#cleanup{
+ color:#666;
+ border:none;
+ font-family: Helvetica, Arial, Verdana, sans-serif;
+ font-weight:400;
+ font-size:12px;
+ height:30px;
+ margin:15px 5px 0 5px;
+ padding:0 10px;
+}
/* add back (removed by reset) indents and bullets for plain lists in text */
ul {
View
@@ -8,6 +8,10 @@
import tempfile
import subprocess
from cStringIO import StringIO
+from shutil import copyfileobj
+
+from MAT import mat
+from MAT import strippers
import logging
log = logging.getLogger(__name__)
@@ -74,15 +78,34 @@ def get_bulk_archive(filenames):
zip.write(filename, arcname=os.path.basename(filename))
return zip_file
-
-def save_file_submission(sid, filename, stream):
+def save_file_submission(sid, filename, stream, content_type, not_clean):
sanitized_filename = secure_filename(filename)
+ text_plain = content_type == 'text/plain'
+
+ f = None
+ t = None
+ clean_file = False
+
+ if not_clean and not text_plain:
+ t = tempfile.NamedTemporaryFile()
+ copyfileobj(stream, t)
+ t.flush()
+ file_meta = metadata_handler(t.name)
+
+ if not file_meta.is_clean():
+ file_meta.remove_all()
+ f = open(t.name)
+ clean_file = True
s = StringIO()
with zipfile.ZipFile(s, 'w') as zf:
- zf.writestr(sanitized_filename, stream.read())
+ zf.writestr(sanitized_filename, f.read() if clean_file else stream.read())
s.reset()
+ if clean_file:
+ f.close()
+ t.close()
+
file_loc = path(sid, "%s_doc.zip.gpg" % uuid.uuid4())
crypto_util.encrypt(config.JOURNALIST_KEY, s, file_loc)
@@ -103,3 +126,6 @@ def secure_unlink(fn, recursive=False):
def delete_source_directory(source_id):
secure_unlink(path(source_id), recursive=True)
+
+def metadata_handler(f):
+ return mat.create_class_file(f, False, add2archive=True)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -25,7 +25,6 @@
import journalist
import test_setup
-
def _block_on_reply_keypair_gen(codename):
sid = crypto_util.hash_codename(codename)
while not crypto_util.getkey(sid):
@@ -192,6 +191,63 @@ def test_submit_both(self):
self.assertIn(escape("Thanks! We received your document 'test.txt'."),
rv.data)
+ def test_submit_dirty_file(self):
+ self.gpg = gnupg.GPG(homedir=config.GPG_KEY_DIR)
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/dirty.jpg')
+ img_metadata = store.metadata_handler(img.name)
+ self.assertFalse(img_metadata.is_clean(), "The file is dirty.")
+ del(img_metadata)
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'dirty.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'dirty.jpg'."),
+ rv.data)
+
+ store_dirs = [os.path.join(config.STORE_DIR,d) for d in os.listdir(config.STORE_DIR) if os.path.isdir(os.path.join(config.STORE_DIR,d))]
+ latest_subdir = max(store_dirs, key=os.path.getmtime)
+ zip_gpg_files = [os.path.join(latest_subdir,f) for f in os.listdir(latest_subdir) if os.path.isfile(os.path.join(latest_subdir,f))]
+ zip_gpg = max(zip_gpg_files, key=os.path.getmtime)
+
+ zip_gpg_file = open(zip_gpg)
+ decrypted_data = self.gpg.decrypt_file(zip_gpg_file)
+ self.assertTrue(decrypted_data.ok, 'Checking the integrity of the data after decryption.')
+
+ s = StringIO(decrypted_data.data)
+ zip_file = zipfile.ZipFile(s, 'r')
+ clean_file = open(os.path.join(latest_subdir,'dirty.jpg'), 'w+b')
+ clean_file.write(zip_file.read('dirty.jpg'))
+ clean_file.seek(0)
+ zip_file.close()
+
+ # check for the actual file been clean
+ clean_file_metadata = store.metadata_handler(clean_file.name)
+ self.assertTrue(clean_file_metadata.is_clean(), "the file is now clean.")
+ del(clean_file_metadata)
+ zip_gpg_file.close()
+ clean_file.close()
+ img.close()
+
+ def test_submit_clean_file(self):
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/clean.jpg')
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'clean.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'clean.jpg'."),
+ rv.data)
+ img.close()
+
@patch('zipfile.ZipFile.writestr')
def test_submit_sanitizes_filename(self, zipfile_write):
"""Test that upload file name is sanitized"""
View
@@ -19,7 +19,7 @@ EOS
SOURCE_ROOT=$(dirname $0)
securedrop_root=$(pwd)/.securedrop
-DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite"
+DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite python-distutils-extra"
while getopts "r:uh" OPTION; do

0 comments on commit 43c2692

Please sign in to comment.