Permalink
Browse files

Added MAT to source.py as a metadata purge option.

- MAT consist of a python library that makes
use of other metadata tools dedicated to specific
file-formats.

Always nice tidy and neat.

Added checkbox for metadata purge.

- Also added more validation on the
file selection for the cleanup.

Better validation on what file to write to.

Added MAT to requirements.txt

- Note: MAT is not available in
  PyPi, therefore, we clone it
  from the Tor repository.

Added MAT to source-requirements.txt

Added exiftool and poppler dependency to Debian/Ubuntu script.

Added tests for binary file upload.

Added test images for MAT

Adding python-distutils-extra to source-requirements.txt
  • Loading branch information...
1 parent c59609d commit 59a64ac9d3f0d572a0cb7d94a6070c5644284955 @gnusosa committed Mar 3, 2014
View
@@ -3,6 +3,7 @@ python:
- "2.7"
install:
- pip install --upgrade distribute
+ - pip install https://launchpad.net/python-distutils-extra/trunk/2.38/+download/python-distutils-extra-2.38.tar.gz
- pip install -r securedrop/source-requirements.txt
- pip install -r securedrop/document-requirements.txt
- pip install -r securedrop/test-requirements.txt
@@ -7,3 +7,6 @@ apache2-mpm-worker
libapache2-mod-wsgi
python-pip
python-dev
+libimage-exiftool-perl
+python-poppler
+python-distutils-extra
@@ -12,3 +12,8 @@ pycrypto==2.6.1
gnupg-securedrop==1.2.5-9-g6f9d63a-dirty
scrypt==0.6.1
wsgiref==0.1.2
+hachoir-core==1.3.3
+hachoir-parser==1.3.4
+mutagen==1.22
+pdfrw==0.1
+-e git+https://git.torproject.org/user/jvoisin/mat.git#egg=MAT
@@ -172,12 +172,13 @@ def async_genkey(sid, codename):
def submit():
msg = request.form['msg']
fh = request.files['fh']
+ not_clean = True if 'notclean' in request.form else False
if msg:
store.save_message_submission(g.sid, msg)
flash("Thanks! We received your message.", "notification")
if fh:
- store.save_file_submission(g.sid, fh.filename, fh.stream)
+ store.save_file_submission(g.sid, fh.filename, fh.stream, fh.content_type, not_clean)
flash("Thanks! We received your document '%s'."
% fh.filename or '[unnamed]', "notification")
@@ -29,7 +29,9 @@
<input name="csrf_token" type="hidden" value="{{ csrf_token() }}"/>
<p style="padding-bottom: 0"><b>Upload a file:</b></p>
<div id="browse-select">
- <input type="file" name="fh" autocomplete="off"/>
+ <input type="file" name="fh" autocomplete="off"/><br />
+ <input type="checkbox" id="cleanup" name="notclean" value="True" />
+ <label for="cleanup">Remove all the metadata of the file.</label>
</div>
<p><b>Or just enter a message:</b></p>
@@ -145,6 +145,16 @@ form input#filename{
padding:0 10px;
}
+form input#cleanup{
+ color:#666;
+ border:none;
+ font-family: Helvetica, Arial, Verdana, sans-serif;
+ font-weight:400;
+ font-size:12px;
+ height:30px;
+ margin:15px 5px 0 5px;
+ padding:0 10px;
+}
/* add back (removed by reset) indents and bullets for plain lists in text */
ul {
View
@@ -8,6 +8,10 @@
import tempfile
import subprocess
from cStringIO import StringIO
+from shutil import copyfileobj
+
+from MAT import mat
+from MAT import strippers
import logging
log = logging.getLogger(__name__)
@@ -74,15 +78,34 @@ def get_bulk_archive(filenames):
zip.write(filename, arcname=os.path.basename(filename))
return zip_file
-
-def save_file_submission(sid, filename, stream):
+def save_file_submission(sid, filename, stream, content_type, not_clean):
sanitized_filename = secure_filename(filename)
+ text_plain = content_type == 'text/plain'
+
+ f = None
+ t = None
+ clean_file = False
+
+ if not_clean and not text_plain:
+ t = tempfile.NamedTemporaryFile()
+ copyfileobj(stream, t)
+ t.flush()
+ file_meta = metadata_handler(t.name)
+
+ if not file_meta.is_clean():
+ file_meta.remove_all()
+ f = open(t.name)
+ clean_file = True
s = StringIO()
with zipfile.ZipFile(s, 'w') as zf:
- zf.writestr(sanitized_filename, stream.read())
+ zf.writestr(sanitized_filename, f.read() if clean_file else stream.read())
s.reset()
+ if clean_file:
+ f.close()
+ t.close()
+
file_loc = path(sid, "%s_doc.zip.gpg" % uuid.uuid4())
crypto_util.encrypt(config.JOURNALIST_KEY, s, file_loc)
@@ -103,3 +126,6 @@ def secure_unlink(fn, recursive=False):
def delete_source_directory(source_id):
secure_unlink(path(source_id), recursive=True)
+
+def metadata_handler(f):
+ return mat.create_class_file(f, False, add2archive=True)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -25,7 +25,6 @@
import journalist
import test_setup
-
def _block_on_reply_keypair_gen(codename):
sid = crypto_util.hash_codename(codename)
while not crypto_util.getkey(sid):
@@ -192,6 +191,63 @@ def test_submit_both(self):
self.assertIn(escape("Thanks! We received your document 'test.txt'."),
rv.data)
+ def test_submit_dirty_file(self):
+ self.gpg = gnupg.GPG(homedir=config.GPG_KEY_DIR)
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/dirty.jpg')
+ img_metadata = store.metadata_handler(img.name)
+ self.assertFalse(img_metadata.is_clean(), "The file is dirty.")
+ del(img_metadata)
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'dirty.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'dirty.jpg'."),
+ rv.data)
+
+ store_dirs = [os.path.join(config.STORE_DIR,d) for d in os.listdir(config.STORE_DIR) if os.path.isdir(os.path.join(config.STORE_DIR,d))]
+ latest_subdir = max(store_dirs, key=os.path.getmtime)
+ zip_gpg_files = [os.path.join(latest_subdir,f) for f in os.listdir(latest_subdir) if os.path.isfile(os.path.join(latest_subdir,f))]
+ zip_gpg = max(zip_gpg_files, key=os.path.getmtime)
+
+ zip_gpg_file = open(zip_gpg)
+ decrypted_data = self.gpg.decrypt_file(zip_gpg_file)
+ self.assertTrue(decrypted_data.ok, 'Checking the integrity of the data after decryption.')
+
+ s = StringIO(decrypted_data.data)
+ zip_file = zipfile.ZipFile(s, 'r')
+ clean_file = open(os.path.join(latest_subdir,'dirty.jpg'), 'w+b')
+ clean_file.write(zip_file.read('dirty.jpg'))
+ clean_file.seek(0)
+ zip_file.close()
+
+ # check for the actual file been clean
+ clean_file_metadata = store.metadata_handler(clean_file.name)
+ self.assertTrue(clean_file_metadata.is_clean(), "the file is now clean.")
+ del(clean_file_metadata)
+ zip_gpg_file.close()
+ clean_file.close()
+ img.close()
+
+ def test_submit_clean_file(self):
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/clean.jpg')
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'clean.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'clean.jpg'."),
+ rv.data)
+ img.close()
+
@patch('zipfile.ZipFile.writestr')
def test_submit_sanitizes_filename(self, zipfile_write):
"""Test that upload file name is sanitized"""

0 comments on commit 59a64ac

Please sign in to comment.