Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added MAT to source.py as a metadata purge option.

- MAT consist of a python library that makes
use of other metadata tools dedicated to specific
file-formats.

Always nice tidy and neat.

Added checkbox for metadata purge.

- Also added more validation on the
file selection for the cleanup.

Better validation on what file to write to.

Added MAT to requirements.txt

- Note: MAT is not available in
  PyPi, therefore, we clone it
  from the Tor repository.

Added MAT to source-requirements.txt

Added exiftool and poppler dependency to Debian/Ubuntu script.

Added tests for binary file upload.

Added test images for MAT

Added MAT to source-requirements and setup_dev.sh

Added intltool for MAT to travis.yml
  • Loading branch information...
commit 43c2692dd1d843f05cacaa38455f06575e331a79 1 parent dee723c
Carlos Sosa authored
2  .travis.yml
View
@@ -2,7 +2,9 @@ language: python
python:
- "2.7"
install:
+ - sudo apt-get install intltool
- pip install --upgrade distribute
+ - pip install https://launchpad.net/python-distutils-extra/trunk/2.38/+download/python-distutils-extra-2.38.tar.gz
- pip install -r securedrop/source-requirements.txt
- pip install -r securedrop/document-requirements.txt
- pip install -r securedrop/test-requirements.txt
3  install_files/source-requirements.txt
View
@@ -7,3 +7,6 @@ apache2-mpm-worker
libapache2-mod-wsgi
python-pip
python-dev
+libimage-exiftool-perl
+python-poppler
+python-distutils-extra
5 securedrop/source-requirements.txt
View
@@ -12,3 +12,8 @@ pycrypto==2.6.1
gnupg-securedrop==1.2.5-9-g6f9d63a-dirty
scrypt==0.6.1
wsgiref==0.1.2
+hachoir-core==1.3.3
+hachoir-parser==1.3.4
+mutagen==1.22
+pdfrw==0.1
+https://mat.boum.org/files/mat-0.4.2.tar.gz
3  securedrop/source.py
View
@@ -172,12 +172,13 @@ def async_genkey(sid, codename):
def submit():
msg = request.form['msg']
fh = request.files['fh']
+ not_clean = True if 'notclean' in request.form else False
if msg:
store.save_message_submission(g.sid, msg)
flash("Thanks! We received your message.", "notification")
if fh:
- store.save_file_submission(g.sid, fh.filename, fh.stream)
+ store.save_file_submission(g.sid, fh.filename, fh.stream, fh.content_type, not_clean)
flash("Thanks! We received your document '%s'."
% fh.filename or '[unnamed]', "notification")
4 securedrop/source_templates/lookup.html
View
@@ -29,7 +29,9 @@
<input name="csrf_token" type="hidden" value="{{ csrf_token() }}"/>
<p style="padding-bottom: 0"><b>Upload a file:</b></p>
<div id="browse-select">
- <input type="file" name="fh" autocomplete="off"/>
+ <input type="file" name="fh" autocomplete="off"/><br />
+ <input type="checkbox" id="cleanup" name="notclean" value="True" />
+ <label for="cleanup">Remove all the metadata of the file.</label>
</div>
<p><b>Or just enter a message:</b></p>
10 securedrop/static/css/securedrop.css
View
@@ -145,6 +145,16 @@ form input#filename{
padding:0 10px;
}
+form input#cleanup{
+ color:#666;
+ border:none;
+ font-family: Helvetica, Arial, Verdana, sans-serif;
+ font-weight:400;
+ font-size:12px;
+ height:30px;
+ margin:15px 5px 0 5px;
+ padding:0 10px;
+}
/* add back (removed by reset) indents and bullets for plain lists in text */
ul {
32 securedrop/store.py
View
@@ -8,6 +8,10 @@
import tempfile
import subprocess
from cStringIO import StringIO
+from shutil import copyfileobj
+
+from MAT import mat
+from MAT import strippers
import logging
log = logging.getLogger(__name__)
@@ -74,15 +78,34 @@ def get_bulk_archive(filenames):
zip.write(filename, arcname=os.path.basename(filename))
return zip_file
-
-def save_file_submission(sid, filename, stream):
+def save_file_submission(sid, filename, stream, content_type, not_clean):
sanitized_filename = secure_filename(filename)
+ text_plain = content_type == 'text/plain'
+
+ f = None
+ t = None
+ clean_file = False
+
+ if not_clean and not text_plain:
+ t = tempfile.NamedTemporaryFile()
+ copyfileobj(stream, t)
+ t.flush()
+ file_meta = metadata_handler(t.name)
+
+ if not file_meta.is_clean():
+ file_meta.remove_all()
+ f = open(t.name)
+ clean_file = True
s = StringIO()
with zipfile.ZipFile(s, 'w') as zf:
- zf.writestr(sanitized_filename, stream.read())
+ zf.writestr(sanitized_filename, f.read() if clean_file else stream.read())
s.reset()
+ if clean_file:
+ f.close()
+ t.close()
+
file_loc = path(sid, "%s_doc.zip.gpg" % uuid.uuid4())
crypto_util.encrypt(config.JOURNALIST_KEY, s, file_loc)
@@ -103,3 +126,6 @@ def secure_unlink(fn, recursive=False):
def delete_source_directory(source_id):
secure_unlink(path(source_id), recursive=True)
+
+def metadata_handler(f):
+ return mat.create_class_file(f, False, add2archive=True)
BIN  securedrop/tests/test_images/clean.jpg
View
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN  securedrop/tests/test_images/dirty.jpg
View
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
58 securedrop/tests/unit_tests.py
View
@@ -25,7 +25,6 @@
import journalist
import test_setup
-
def _block_on_reply_keypair_gen(codename):
sid = crypto_util.hash_codename(codename)
while not crypto_util.getkey(sid):
@@ -192,6 +191,63 @@ def test_submit_both(self):
self.assertIn(escape("Thanks! We received your document 'test.txt'."),
rv.data)
+ def test_submit_dirty_file(self):
+ self.gpg = gnupg.GPG(homedir=config.GPG_KEY_DIR)
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/dirty.jpg')
+ img_metadata = store.metadata_handler(img.name)
+ self.assertFalse(img_metadata.is_clean(), "The file is dirty.")
+ del(img_metadata)
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'dirty.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'dirty.jpg'."),
+ rv.data)
+
+ store_dirs = [os.path.join(config.STORE_DIR,d) for d in os.listdir(config.STORE_DIR) if os.path.isdir(os.path.join(config.STORE_DIR,d))]
+ latest_subdir = max(store_dirs, key=os.path.getmtime)
+ zip_gpg_files = [os.path.join(latest_subdir,f) for f in os.listdir(latest_subdir) if os.path.isfile(os.path.join(latest_subdir,f))]
+ zip_gpg = max(zip_gpg_files, key=os.path.getmtime)
+
+ zip_gpg_file = open(zip_gpg)
+ decrypted_data = self.gpg.decrypt_file(zip_gpg_file)
+ self.assertTrue(decrypted_data.ok, 'Checking the integrity of the data after decryption.')
+
+ s = StringIO(decrypted_data.data)
+ zip_file = zipfile.ZipFile(s, 'r')
+ clean_file = open(os.path.join(latest_subdir,'dirty.jpg'), 'w+b')
+ clean_file.write(zip_file.read('dirty.jpg'))
+ clean_file.seek(0)
+ zip_file.close()
+
+ # check for the actual file been clean
+ clean_file_metadata = store.metadata_handler(clean_file.name)
+ self.assertTrue(clean_file_metadata.is_clean(), "the file is now clean.")
+ del(clean_file_metadata)
+ zip_gpg_file.close()
+ clean_file.close()
+ img.close()
+
+ def test_submit_clean_file(self):
+ self._new_codename()
+ img = open(os.getcwd()+'/tests/test_images/clean.jpg')
+ codename = self._new_codename()
+ rv = self.client.post('/submit', data=dict(
+ msg="This is a test",
+ fh=(img, 'clean.jpg'),
+ notclean='True',
+ ), follow_redirects=True)
+ self.assertEqual(rv.status_code, 200)
+ self.assertIn("Thanks! We received your message.", rv.data)
+ self.assertIn(escape("Thanks! We received your document 'clean.jpg'."),
+ rv.data)
+ img.close()
+
@patch('zipfile.ZipFile.writestr')
def test_submit_sanitizes_filename(self, zipfile_write):
"""Test that upload file name is sanitized"""
2  setup_dev.sh
View
@@ -19,7 +19,7 @@ EOS
SOURCE_ROOT=$(dirname $0)
securedrop_root=$(pwd)/.securedrop
-DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite"
+DEPENDENCIES="gnupg2 secure-delete haveged python-dev python-pip sqlite python-distutils-extra"
while getopts "r:uh" OPTION; do
Please sign in to comment.
Something went wrong with that request. Please try again.