-
Notifications
You must be signed in to change notification settings - Fork 0
/
cazscan.py
50 lines (42 loc) · 1.65 KB
/
cazscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
Cazador file/cloud service investigator scanning module.
This module handles walking through a given service provider to scan for content
within the files stored. This will create temporary copies of each file locally
to perform the scan.
Created: 08/29/2016
Creator: Nathan Palmer
"""
import io
import os
import tika
from tika import parser
import cazobjects
def create_temp_name(temp_dir, file_id):
"""Create a temporary file name based on the ID."""
return os.path.join(temp_dir,
"caz_{}".format(os.path.basename(file_id)))
def search_content(file_path, expressions):
"""Open a file and search it's contents against a set of RegEx."""
matches = []
count = 0
data = parser.from_file(file_path)
# Read into an I/O buffer for better readline support
if not data:
# There is no content that could be extracted
return matches
content = io.StringIO(data['content'])
# TODO this may create a very large buffer for larger files
# We may need to convert this to a while readline() loop
for line in content.readlines():
count += 1 # count the number of lines
if line:
for rex in expressions:
# Check if the line matches all the expressions
res = rex.regex.search(line)
if res:
# If there's a match append to the list
matches.append(cazobjects.CazRegMatch(res,
file_path,
count,
rex.name))
return matches