Skip to content

Commit

Permalink
Disable sanitization by default
Browse files Browse the repository at this point in the history
  • Loading branch information
hexylena committed Aug 1, 2017
1 parent 94a8a9a commit cf4dc28
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
10 changes: 7 additions & 3 deletions scripts/grt.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def annotate(label, human_label=None):
CHECK_POINT_FILE = os.path.join(REPORT_DIR, '.checkpoint')
REPORT_IDENTIFIER = str(time.time())
REPORT_BASE = os.path.join(REPORT_DIR, REPORT_IDENTIFIER)
SANITIZATION_ENABLED = config['sanitization']['enabled']

if os.path.exists(CHECK_POINT_FILE):
with open(CHECK_POINT_FILE, 'r') as handle:
Expand All @@ -219,7 +220,7 @@ def annotate(label, human_label=None):
job_state_data = defaultdict(int)

annotate('san_init', 'Building Sanitizer')
san = Sanitization(config['blacklist'], model, sa_session)
san = Sanitization(config['sanitization'], model, sa_session)
annotate('san_end')

if not os.path.exists(REPORT_DIR):
Expand Down Expand Up @@ -313,8 +314,11 @@ def annotate(label, human_label=None):
.filter(model.JobParameter.job_id <= min(end_job_id, offset_start + args.batch_size)) \
.all():

unsanitized = {param[1]: json.loads(param[2])}
sanitized = san.sanitize_data(job_tool_map[param[0]], unsanitized)
if SANITIZATION_ENABLED:
unsanitized = {param[1]: json.loads(param[2])}
sanitized = san.sanitize_data(job_tool_map[param[0]], unsanitized)
else:
sanitized = param[2]

handle_params.write(str(param[0]))
handle_params.write('\t')
Expand Down
7 changes: 6 additions & 1 deletion scripts/grt.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@ grt:
share_toolbox: True


blacklist:
sanitization:
# This defaults to disabled as it has a serious performance impact and may
# not be necessary for your instance. Without sanitization we see
# performance on the order of 4k parameters parsed per second. With
# sanitizatoin on, it averages to 100 parameters per second.
enabled: False
# Blacklist the entire tool from appearing
tools:
- __SET_METADATA__
Expand Down

0 comments on commit cf4dc28

Please sign in to comment.