Skip to content

Commit

Permalink
wip on microsoft office document decryption support
Browse files Browse the repository at this point in the history
Other than the raw implementation this hasn't been integrated into
sflock further yet, but that's yet to come. Thanks to nolze [1] and
Mitsunari Shigeo [2] for their research, on which this is fully based.

[1]: https://github.com/nolze/ms-offcrypto-tool
[2]: https://github.com/herumi/msoffice
  • Loading branch information
jbremer committed May 3, 2017
1 parent 775121d commit 3f6a96a
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
install_requires=[
"click==6.6",
"olefile==0.43",
"pycrypto==2.6.1",
"python-magic==0.4.12",
],
)
10 changes: 10 additions & 0 deletions sflock/abstracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ def process_directory(self, dirpath, duplicates, password=None):
shutil.rmtree(dirpath)
return self.process(entries, duplicates)

class Decoder(object):
"""Abstract class for Decoder engines."""

# Initiated at runtime - contains each Decoder subclass.
plugins = {}

def __init__(self, f, password):
self.f = f
self.password = password

class File(object):
"""Abstract class for all file operations.
Expand Down
8 changes: 8 additions & 0 deletions sflock/decode/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (C) 2017 Jurriaan Bremer.
# This file is part of SFlock - http://www.sflock.org/.
# See the file 'docs/LICENSE.txt' for copying permission.

from sflock.abstracts import Decoder
from sflock.misc import import_plugins

plugins = import_plugins(__file__, "sflock.decode", globals(), Decoder)
109 changes: 109 additions & 0 deletions sflock/decode/office.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright (C) 2017 Jurriaan Bremer.
# This file is part of SFlock - http://www.sflock.org/.
# See the file 'docs/LICENSE.txt' for copying permission.

import hashlib
import olefile
import struct
import xml.dom.minidom

from Crypto.Cipher import AES, PKCS1_v1_5
from Crypto.PublicKey import RSA

from sflock.abstracts import Decoder, File

class EncryptedInfo(object):
key_data_salt = None
key_data_hash_alg = None
encrypted_key_value = None
spin_value = None
password_salt = None
password_hash_alg = None
password_key_bits = None

class Office(Decoder):
name = "office"

def get_hash(self, value, algorithm):
if algorithm == "SHA512":
return hashlib.sha512(value).digest()
else:
return hashlib.sha1(value).digest()

@property
def secret_key(self):
if self._secret_key:
return self._secret_key

# TODO Add support for private keys.
if False:
rsa = PKCS1_v1_5.new(RSA.importKey(self._private_key))
self._secret_key = rsa.decrypt(self.ei.encrypted_key_value, None)
return self._secret_key

if self.password:
block3 = bytearray([
0x14, 0x6e, 0x0b, 0xe7, 0xab, 0xac, 0xd0, 0xd6,
])

# Initial round sha512(salt + password).
h = self.get_hash(
self.ei.password_salt + self.password.encode("utf-16le"),
self.ei.password_hash_alg
)

# Iteration of 0 -> spincount-1; hash = sha512(iterator + hash).
for i in range(self.ei.spin_value):
h = self.get_hash(
struct.pack("<I", i) + h, self.ei.password_hash_alg
)

# Final skey and truncation.
h = self.get_hash(h + block3, self.ei.password_hash_alg)
skey = h[:self.ei.password_key_bits/8]

# AES decrypt the encryptedKeyValue with the skey and salt in
# order to get secret key.
aes = AES.new(skey, AES.MODE_CBC, self.ei.password_salt)
self._secret_key = aes.decrypt(self.ei.encrypted_key_value)
return self._secret_key

def decrypt_blob(self, f):
ret = []
# TODO Ensure that the assumption of "total size" being a 64-bit
# integer is correct?
for idx in xrange(0, struct.unpack("Q", f.read(8))[0], 0x1000):
iv = self.get_hash(
self.ei.key_data_salt + struct.pack("<I", idx),
self.ei.key_data_hash_alg
)
aes = AES.new(self.secret_key, AES.MODE_CBC, iv[:16])
ret.append(aes.decrypt(f.read(0x1000)))
return File(contents="".join(ret))

def decrypt(self):
try:
ole = olefile.OleFileIO(self.f.stream)
except IOError:
return

self._secret_key = None

info = xml.dom.minidom.parseString(
ole.openstream("EncryptionInfo").read()[8:]
)
key_data = info.getElementsByTagName("keyData")[0]
password = info.getElementsByTagName("p:encryptedKey")[0]

self.ei = ei = EncryptedInfo()
ei.key_data_salt = key_data.getAttribute("saltValue").decode("base64")
ei.key_data_hash_alg = key_data.getAttribute("hashAlgorithm")
ei.encrypted_key_value = (
password.getAttribute("encryptedKeyValue").decode("base64")
)
ei.spin_value = int(password.getAttribute("spinCount"))
ei.password_salt = password.getAttribute("saltValue").decode("base64")
ei.password_hash_alg = password.getAttribute("hashAlgorithm")
ei.password_key_bits = int(password.getAttribute("keyBits"))

return self.decrypt_blob(ole.openstream("EncryptedPackage"))
Binary file added tests/files/encrypted1.docx
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/test_decode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (C) 2017 Jurriaan Bremer.
# This file is part of SFlock - http://www.sflock.org/.
# See the file 'docs/LICENSE.txt' for copying permission.

from sflock.abstracts import File
from sflock.decode.office import Office

def f(filename):
return File.from_path("tests/files/%s" % filename)

def test_decode_docx():
o = Office(f("encrypted1.docx"), "Password1234_").decrypt()
assert o.magic in (
"Microsoft Word 2007+", "Zip archive data, at least v2.0 to extract"
)

0 comments on commit 3f6a96a

Please sign in to comment.