Skip to content

Commit

Permalink
Implement EML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
helviojunior committed May 17, 2024
1 parent 3e2584e commit a26ece7
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 2 deletions.
2 changes: 1 addition & 1 deletion filecrawler/__meta__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.1.8'
__version__ = '0.1.9'
__title__ = "FileCrawler"
__description__ = "File Crawler index files and search hard-coded credentials."
__url__ = "https://github.com/helviojunior/filecrawler"
Expand Down
45 changes: 45 additions & 0 deletions filecrawler/libs/containerfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ContainerFile(object):
dict(name='bz2', extensions=['bz2'], mime=['application/x-bzip2']),
dict(name='gz', extensions=['gz'], mime=['application/gzip']),
dict(name='7z', extensions=['7z'], mime=['application/x-7z-compressed']),
dict(name='eml', extensions=['eml'], mime=['message/rfc822']),
#dict(name='tar', extensions=['tar'], mime=['application/x-tar']),
dict(name='apk', extensions=['apk'], mime=[]),
dict(name='jar', extensions=['jar'], mime=[])
Expand Down Expand Up @@ -89,6 +90,50 @@ def extract(self) -> Optional[Path]:

return None

def extract_eml(self) -> bool:
from filecrawler.config import Configuration
if not Configuration.extract_files:
return False

self.create_folder()

try:
import glob
import email
from email import policy

with open(str(self._file.path), "r") as f:
msg = email.message_from_file(f, policy=policy.default)
for attachment in msg.iter_attachments():
try:
output_filename = attachment.get_filename()
except AttributeError:
print("Got string instead of filename for %s. Skipping." % f.name)
continue

msg_data = None
try:
msg_data = attachment.get_payload(decode=True)
except TypeError:
print("Couldn't get payload for %s" % output_filename)
continue

# If no attachments are found, skip this file
if msg_data is not None:
if output_filename is None:
output_filename = Tools.random_generator(size=10) + Tools.guess_extensions(msg_data)

with open(os.path.join(str(self._temp_path), output_filename), "wb") as of:
try:
of.write(msg_data)
except TypeError:
print("Couldn't get payload for %s" % output_filename)

return True
except Exception as e:
#Tools.print_error(e)
return False

def extract_7z(self) -> bool:
from filecrawler.config import Configuration
if not Configuration.extract_files:
Expand Down
27 changes: 26 additions & 1 deletion filecrawler/util/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,32 @@ def to_datetime(epoch: [int, float]) -> datetime.datetime:
def to_boolean(text: [str, bool]) -> bool:
return bool(text)

@staticmethod
def guess_extension(file_path: str) -> str:
try:
import mimetypes
mimetypes.init()
ext = mimetypes.guess_extension(Tools.get_mime(file_path), strict=True)
if ext is None:
return ".bin"
return str(ext)
except Exception as e:
#Tools.print_error(e)
return ".bin"

@staticmethod
def guess_extensions(data: [str, bytes]) -> str:
try:
import mimetypes
mimetypes.init()
ext = mimetypes.guess_extension(Tools.get_mimes(data), strict=True)
if ext is None:
return ".bin"
return str(ext)
except Exception as e:
#Tools.print_error(e)
return ".bin"

@staticmethod
def get_mime(file_path: str) -> str:
return Tools.get_mimes(open(file_path, "rb").read(2048))
Expand Down Expand Up @@ -218,7 +244,6 @@ def get_mimes(data: [str, bytes]) -> str:
Tools.print_error(e)
return 'application/octet-stream'


@staticmethod
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""
Expand Down

0 comments on commit a26ece7

Please sign in to comment.