jordanpotti · 0xd34db33f · Nov 15, 2018 · Nov 15, 2018 · Nov 15, 2018 · Nov 16, 2018
diff --git a/.gitignore b/.gitignore
@@ -1 +1,6 @@
 venv/
+interesting_file.txt
+awsbucketdump.log
+*.pyc
+*.log
+*.swp
diff --git a/AWSBucketDump.py b/AWSBucketDump.py
@@ -17,9 +17,18 @@
 import os
 import shutil
 import traceback
+
+import logging
 from queue import Queue
 from threading import Thread, Lock
+from common import pretty_print, log
 
+try:
+    import yara
+    ENABLE_YARA=True
+except (OSError,ImportError):
+    log("Warning: Yara is not properly installed, will not enable",logging.WARNING)
+    ENABLE_YARA=False
 
 bucket_q = Queue()
 download_q = Queue()
@@ -28,35 +37,48 @@
 
 arguments = None
 
+yara_rules = None
+
+#Disable requests logging
+logging.getLogger("requests").setLevel(logging.WARNING)
+
+def yara_index_build():
+    with open('YaraRules/index.yar', 'w') as yar:
+        for filename in os.listdir('YaraRules'):
+            if filename.endswith('.yar') and filename != 'index.yar':
+                include = 'include "{0}"\n'.format(filename)
+                yar.write(include)
+
 def fetch(url):
-    print('Fetching ' + url + '...')
+    log('Fetching ' + url + '...',logging.DEBUG)
     response = requests.get(url)
     if response.status_code == 403 or response.status_code == 404:
         status403(url)
     if response.status_code == 200:
         if "Content" in response.text:
-            returnedList=status200(response,grep_list,url)
+            returnedList=status200(response,grep_list,yara_rules,url)
 
 
 def bucket_worker():
     while True:
         item = bucket_q.get()
+        print("Processing: {}".format(item))
         try:
             fetch(item)
         except Exception as e:
             traceback.print_exc(file=sys.stdout)
-            print(e)
+            log(e,logging.ERROR)
         bucket_q.task_done()
 
 def downloadWorker():
-    print('Download worker running...')
+    log('Download worker running...',logging.DEBUG)
     while True:
         item = download_q.get()
         try:
             downloadFile(item)
         except Exception as e:
             traceback.print_exc(file=sys.stdout)
-            print(e)
+            logging(e,logging.ERROR)
         download_q.task_done()
 
 directory_lock = Lock()
@@ -80,7 +102,7 @@ def get_make_directory_return_filename_path(url):
             os.makedirs(directory)
     except Exception as e:
         traceback.print_exc(file=sys.stdout)
-        print(e)
+        log(e,logging.ERROR)
     finally:
         release_directory_lock()
 
@@ -107,20 +129,32 @@ def write_interesting_file(filepath):
 
 def downloadFile(filename):
     global arguments
-    print('Downloading {}'.format(filename) + '...')
+    allowFileWrite = True
+    log('Downloading: {}'.format(filename))
     local_path = get_make_directory_return_filename_path(filename)
     local_filename = (filename.split('/')[-1]).rstrip()
-    print('local {}'.format(local_path))
+    log('Local File: {}'.format(local_path))
     if local_filename =="":
-        print("Directory..\n")
+        log("Directory..\n",logging.DEBUG)
     else:
         r = requests.get(filename.rstrip(), stream=True)
         if 'Content-Length' in r.headers:
             if int(r.headers['Content-Length']) > arguments.maxsize:
-                print("This file is greater than the specified max size... skipping...\n")
+                log("Content Length Exceeded: {} is greater than the specified max size. Skipped".format(filename))
             else:
-                with open(local_path, 'wb') as f:
-                    shutil.copyfileobj(r.raw, f)
+                file_data = r.content
+                if yara_rules != None:
+                    matches = yara_rules.match(data=file_data)
+                    #print(matches)
+                    if len(matches) > 0:
+                        for match in matches['main']:
+                            #print str(match.rule)
+                            log("Yara Rule Match: {} matched {}".format(filename,(match['rule'])))
+                    else:
+                        allowFileWrite = False
+                if allowFileWrite == True:
+                    with open(local_path, 'wb') as f:
+                        f.write(file_data)
         r.close()
 
 
@@ -139,20 +173,20 @@ def print_banner():
 
 
 def cleanUp():
-   print("Cleaning up files...")
+    log("Cleaning up files...",logging.DEBUG)
 
 def status403(line):
-    print(line.rstrip() + " is not accessible.")
+    log("403 Error: "+line.rstrip() + " is not accessible.")
 
 
 def queue_up_download(filepath):
     download_q.put(filepath)
-    print('Collectable: {}'.format(filepath))
+    log('Collectable: {}'.format(filepath),logging.DEBUG)
     write_interesting_file(filepath)
 
 
-def status200(response,grep_list,line):
-    print("Pilfering "+line.rstrip() + '...')
+def status200(response,grep_list,yara_rules,line):
+    log("Pilfering "+line.rstrip() + '...',logging.DEBUG)
     objects=xmltodict.parse(response.text)
     Keys = []
     interest=[]
@@ -177,14 +211,15 @@ def status200(response,grep_list,line):
 def main():
     global arguments
     global grep_list
+    global yara_rules
     parser = ArgumentParser()
     parser.add_argument("-D", dest="download", required=False, action="store_true", default=False, help="Download files. This requires significant disk space.") 
     parser.add_argument("-d", dest="savedir", required=False, default='', help="If -D, then -d 1 to create save directories for each bucket with results.")
     parser.add_argument("-l", dest="hostlist", required=True, help="") 
     parser.add_argument("-g", dest="grepwords", required=False, help="Provide a wordlist to grep for.")
     parser.add_argument("-m", dest="maxsize", type=int, required=False, default=1024, help="Maximum file size to download.")
     parser.add_argument("-t", dest="threads", type=int, required=False, default=1, help="Number of threads.")
-
+    parser.add_argument("-Y", dest="runyara", required=False, action="store_true", default=False,help="Run Yara rules against downloads and only save matches")
     if len(sys.argv) == 1:
         print_banner()
         parser.error("No arguments given.")
@@ -195,21 +230,25 @@ def main():
     # output parsed arguments into a usable object
     arguments = parser.parse_args()
 
+    if arguments.runyara != False and ENABLE_YARA == True:
+        yara_index_build()
+        yara_rules = yara.compile("YaraRules/index.yar")
     # specify primary variables
-    with open(arguments.grepwords, "r") as grep_file:
-        grep_content = grep_file.readlines()
-    grep_list = [ g.strip() for g in grep_content ]
+    if arguments.grepwords != None:
+        with open(arguments.grepwords, "r") as grep_file:
+            grep_content = grep_file.readlines()
+        grep_list = [ g.strip() for g in grep_content ]
 
     if arguments.download and arguments.savedir:
         print("Downloads enabled (-D), save directories (-d) for each host will be created/used.")
+        print("Default Download Directory: {}".format(arguments.savedir))
     elif arguments.download and not arguments.savedir:
         print("Downloads enabled (-D), will be saved to current directory.")
     else:
         print("Downloads were not enabled (-D), not saving results locally.")
-
     # start up bucket workers
     for i in range(0,arguments.threads):
-        print('Starting thread...')
+        log('Starting thread...',logging.DEBUG)
         t = Thread(target=bucket_worker)
         t.daemon = True
         t.start()
@@ -223,7 +262,7 @@ def main():
     with open(arguments.hostlist) as f:
         for line in f:
             bucket = 'http://'+line.rstrip()+'.s3.amazonaws.com'
-            print('Queuing {}'.format(bucket) + '...')
+            log('Queuing: {}'.format(bucket))
             bucket_q.put(bucket)
 
     bucket_q.join()

diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ Non-Standard Python Libraries:
 * xmltodict
 * requests
 * argparse
+* yara
 
 Created with Python 3.6
 
@@ -38,7 +39,7 @@ After building this tool, I did find an [interesting article](https://community.
 
 ## Usage:
 
-    usage: AWSBucketDump.py [-h] [-D] [-t THREADS] -l HOSTLIST [-g GREPWORDS] [-m MAXSIZE]
+    usage: AWSBucketDump.py [-h] [-D] [-t THREADS] -l HOSTLIST [-g GREPWORDS] [-m MAXSIZE] [-Y]
 
     optional arguments:
       -h, --help    show this help message and exit
@@ -48,8 +49,8 @@ After building this tool, I did find an [interesting article](https://community.
       -l HOSTLIST
       -g GREPWORDS  Provide a wordlist to grep for
       -m MAXSIZE    Maximum file size to download.
-
-     python AWSBucketDump.py -l BucketNames.txt -g interesting_Keywords.txt -D -m 500000 -d 1
+      -Y            Enable Yara scanning of downloaded files  
+     python AWSBucketDump.py -l samples/BucketNames.txt -g samples/InterestingKeywords.txt -D -m 500000 -d 1
 
 ### Contributors
 

diff --git a/YaraRules/.gitignore b/YaraRules/.gitignore
@@ -0,0 +1 @@
+index.yar
diff --git a/YaraRules/pdf.yar b/YaraRules/pdf.yar
@@ -0,0 +1,8 @@
+rule pdf_file
+{
+	strings:
+		$magic = { 25 50 44 46 }
+
+	condition:
+		$magic at 0
+}
diff --git a/common.py b/common.py
@@ -0,0 +1,49 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Borrowed from GFYP codebase, work by Kristov Atlas 
+
+"""Shared functions and constants"""
+import sys
+import logging
+
+LOG_FILENAME = 'awsbucketdump.log'
+
+BOLD = ""
+END = ""
+if sys.platform != 'win32' and sys.stdout.isatty():
+    BOLD = "\033[1m"
+    END = "\033[0m"
+
+def pretty_print(string):
+    """For *nix systems, augment TTY output. For others, strip such syntax."""
+    string = string.replace('$BOLD$', BOLD)
+    string = string.replace('$END$', END)
+    print(string)
+
+def log(msg, level=logging.INFO):
+    """Add a string to the log file."""
+    logging.basicConfig(filename=LOG_FILENAME,
+                        format='%(asctime)s:%(levelname)s:%(message)s',
+                        level=logging.INFO)
+    if level == logging.DEBUG:
+        logging.debug(msg)
+    elif level == logging.INFO:
+        logging.info(msg)
+    elif level == logging.WARNING:
+        logging.warning(msg)
+    elif level == logging.ERROR:
+        logging.error(msg)
+    elif level == logging.CRITICAL:
+        logging.critical(msg)
+    else:
+        raise ValueError(str(level))
diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,4 @@ idna==2.6
 requests==2.20.0
 urllib3==1.22
 xmltodict==0.11.0
+yara
diff --git a/BucketNames.txt → samples/BucketNames.txt b/BucketNames.txt → samples/BucketNames.txt
diff --git a/interesting_Keywords.txt → samples/InterestingKeywords.txt b/interesting_Keywords.txt → samples/InterestingKeywords.txt