# Scorers reducer
This script alters the scorers configuration file such that each scorer specified in the given configuration will be wrapped by a `low-values-score-reducer` accordingly.

### Configuration:
* `aggregated_feature_event_prevalance_stats_path` is the path to the configuration file.
* `REDUCERS` maps each F that should be wrapped to its reduction parameters.

### Output:
The input file is overridden with the updated scorers (lines that have nothing to do with the wrapped scorers are kept unchanged). The original file's content is moved to a backup file in the same directory as the input file (with timestamp in the file's name).

In [None]:
#aggregated_feature_event_prevalance_stats_path = r'C:\Users\yoelz\projects\fortscale-core\fortscale\fortscale-streaming\config\aggregated-feature_event-prevalance-stats.properties'
aggregated_feature_event_prevalance_stats_path = r'/home/cloudera/fortscale/streaming/config/aggregated-feature_event-prevalance-stats.properties'
REDUCERS = {
#    'number_of_failed_vpn_daily': {
#        'min_value_for_not_reduce': 7,
#        'max_value_for_fully_reduce': 2,
#        'reducing_factor': 0.1
#    }
}

In [None]:
import re
import os
import datetime

In [None]:
def find_f_name_to_scorer_names(lines, reducers):
    f_names = list(reducers.iterkeys())
    f_name_to_scorer_names = {}
    for l in lines:
        match = re.search('fortscale\.aggr_event\..*\.(' + '|'.join(f_names) + ').fortscale.scorers=(.+)', l)
        if match is not None:
            f_name, scorer_name = match.groups()
            if f_name_to_scorer_names.has_key(f_name):
                raise Exception('duplicate F name: ' + f_name)
            if scorer_name in f_name_to_scorer_names.itervalues():
                raise Exception('duplicate scorer: ' + scorer_name)
            f_name_to_scorer_names[match.group(1)] = scorer_name
    return f_name_to_scorer_names

def update_reducer_if_needed(l, f_name_to_scorer_names, reducers):
    match = re.search('(fortscale\.aggr_event\..*\.(' + '|'.join(f_name_to_scorer_names.itervalues()) +
                      ')\.reduction\.configs=)', l)
    if match is not None:
        prefix, scorer_name = match.groups()
        f_name = [entry[0] for entry in f_name_to_scorer_names.iteritems() if entry[1] == scorer_name][0]
        reducer = reducers[f_name]
        l = prefix + '{"reductionConfigs":[{"reducingFeatureName":"aggregated_feature_value","reducingFactor":' + \
            str(reducer['reducing_factor']) + ',"maxValueForFullyReduce":' + str(reducer['max_value_for_fully_reduce']) + \
            ',"minValueForNotReduce":' + str(reducer['min_value_for_not_reduce']) + '}]}' + '\n'
    return l

def transform_to_reducer_if_needed(l, f_name_to_scorer_names, reducers):
    match = re.search('fortscale\.aggr_event\..*\.(' + '|'.join(f_name_to_scorer_names.itervalues()) + ')\.', l)
    if match is not None:
        scorer_name = match.group(1)
        prefix = l[:l.index('.fortscale') + len('.fortscale')]
        suffix = scorer_name.find('_scorer')
        if suffix < 0:
            print 'warning: scorer name is not according to convention - ' + scorer_name
            suffix = len(scorer_name)
        base_scorer_name = scorer_name[:suffix] + '_base_scorer'
        if l.endswith('output.field.name=score\n'):
            f_name = [entry[0] for entry in f_name_to_scorer_names.iteritems() if entry[1] == scorer_name][0]
            reducer = reducers[f_name]
            l += prefix + '.score.' + scorer_name + '.scorer=low-values-score-reducer' + '\n'
            l += prefix + '.score.' + scorer_name + '.base.scorer=' + base_scorer_name + '\n'
            l += prefix + '.score.' + scorer_name + \
                '.reduction.configs={"reductionConfigs":[{"reducingFeatureName":"aggregated_feature_value","reducingFactor":' + \
                str(reducer['reducing_factor']) + ',"maxValueForFullyReduce":' + str(reducer['max_value_for_fully_reduce']) + \
                ',"minValueForNotReduce":' + str(reducer['min_value_for_not_reduce']) + '}]}' + '\n'
            l += prefix + '.score.' + base_scorer_name + '.output.field.name=baseScore' + '\n'
        else:
            l = l.replace(scorer_name, base_scorer_name)
    return l
        
def wrap_scorers(lines, f_name_to_scorer_names, reducers):
    fs_with_low_values_scorer_to_scorer_name = {}
    fs_with_non_low_values_scorer_to_scorer_name = {}
    for l in lines:
        match = re.search('fortscale\.aggr_event\..*\.(' + '|'.join(f_name_to_scorer_names.itervalues()) + ')\.scorer=(.+)', l)
        if match is not None:
            scorer_name, scorer_type = match.groups()
            f_name = [entry[0] for entry in f_name_to_scorer_names.iteritems() if entry[1] == scorer_name][0]
            if scorer_type == 'low-values-score-reducer':
                fs_with_low_values_scorer_to_scorer_name[f_name] = f_name_to_scorer_names[f_name]
            else:
                fs_with_non_low_values_scorer_to_scorer_name[f_name] = f_name_to_scorer_names[f_name]
    res = ''
    for l in lines:
        l = update_reducer_if_needed(l, fs_with_low_values_scorer_to_scorer_name, reducers)
        l = transform_to_reducer_if_needed(l, fs_with_non_low_values_scorer_to_scorer_name, reducers)
        res += l
    return res

In [None]:
with open(aggregated_feature_event_prevalance_stats_path, 'r') as f:
    conf_lines = f.readlines()
f_name_to_scorer_names = find_f_name_to_scorer_names(conf_lines, REDUCERS)
transformed = wrap_scorers(conf_lines, f_name_to_scorer_names, REDUCERS)

now = str(datetime.datetime.now()).replace(' ', '_').replace(':', '-')
now = now[:now.index('.')]
os.rename(aggregated_feature_event_prevalance_stats_path, aggregated_feature_event_prevalance_stats_path + '.backup-' + now)
with open(aggregated_feature_event_prevalance_stats_path, 'w') as f:
    f.writelines(transformed)