Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Add a mapping transform
Browse files Browse the repository at this point in the history
This duplicates the functionality of the group-mapping property when
using a group_timeseries module in Spotlight.

The main use of this is for mapping the browser and browserVersion
fields in browser-usage datasets back to simpler groupings of Chrome,
Firefox, Internet Explorer < 9 and Internet Explorer >= 9.
  • Loading branch information
tombooth committed Jan 12, 2015
1 parent df45646 commit 70f1ff5
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 0 deletions.
62 changes: 62 additions & 0 deletions backdrop/transformers/tasks/mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import re

from .util import group_by


def compile_mappings(mappings):
compiled = {}

for mapping, patterns in mappings.iteritems():
compiled[mapping] = map(re.compile, patterns)

return compiled


def match_mapping(values, mappings):
for mapping, re_list in mappings.iteritems():
if all(map(lambda value, re: re.search(value), values, re_list)):
return mapping

return None


def map_data(grouped_data, mappings, mapped_attribute, other_mapping, value_attribute):
mapped_data = {}

for grouped_values, data in grouped_data.iteritems():
start_at = grouped_values[0]
end_at = grouped_values[1]
mapping_values = grouped_values[2:]

mapping = match_mapping(mapping_values, mappings) or other_mapping

if mapping is not None:
period_mapping_key = (start_at, end_at, mapping)
summed_value = reduce(
lambda sum, datum: sum + datum[value_attribute], data, 0)

if period_mapping_key in mapped_data:
mapped_data[period_mapping_key][
value_attribute] += summed_value
else:
mapped_data[period_mapping_key] = {
"_start_at": start_at,
"_end_at": end_at,
mapped_attribute: mapping,
value_attribute: summed_value,
}

return mapped_data.values()


def compute(data, options):
grouped = group_by(
['_start_at', '_end_at'] + options['mapping-keys'], data)
compiled_mappings = compile_mappings(options['mappings'])
mapped_data = map_data(
grouped, compiled_mappings,
options['mapped-attribute'],
options.get('other-mapping', None),
options['value-attribute'])

return mapped_data
123 changes: 123 additions & 0 deletions tests/transformers/tasks/test_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import unittest

from hamcrest import assert_that, is_

from backdrop.transformers.tasks.mapping import compute


data = [
{
"_count": 1,
"_end_at": "2014-12-29T00:00:00+00:00",
"_start_at": "2014-12-22T00:00:00+00:00",
"browser": "Internet Explorer",
"browserVersion": "6",
"sessions:sum": 10
},
{
"_count": 1,
"_end_at": "2014-12-29T00:00:00+00:00",
"_start_at": "2014-12-22T00:00:00+00:00",
"browser": "Internet Explorer",
"browserVersion": "7",
"sessions:sum": 11
},
{
"_count": 1,
"_end_at": "2014-12-29T00:00:00+00:00",
"_start_at": "2014-12-22T00:00:00+00:00",
"browser": "Internet Explorer",
"browserVersion": "9",
"sessions:sum": 12
},
{
"_count": 1,
"_end_at": "2014-12-29T00:00:00+00:00",
"_start_at": "2014-12-22T00:00:00+00:00",
"browser": "Internet Explorer",
"browserVersion": "10",
"sessions:sum": 13
},
{
"_count": 1,
"_end_at": "2014-12-29T00:00:00+00:00",
"_start_at": "2014-12-22T00:00:00+00:00",
"browser": "Some other browser",
"browserVersion": "10",
"sessions:sum": 14
},
]


def matches(item, props):
return all(item[k] == v for k, v in props.iteritems())


def find_in(arr, props):
for item in arr:
if matches(item, props):
return item

return None


class MappingTestCase(unittest.TestCase):

def test_compute(self):
transformed_data = compute(data, {
"value-attribute": "sessions:sum",
"mapped-attribute": "browser-group",
"mapping-keys": ["browser", "browserVersion"],
"mappings": {
"OldIE": ["Internet Explorer", "[2-8]{1}(.)*"],
"NewIE": ["Internet Explorer", "(9|1)(.)*"],
},
})

assert_that(len(transformed_data), is_(2))

print transformed_data

old_ie = find_in(transformed_data, {
"browser-group": "OldIE",
"_start_at": "2014-12-22T00:00:00+00:00",
})
assert_that(old_ie["sessions:sum"], is_(21))

new_ie = find_in(transformed_data, {
"browser-group": "NewIE",
"_start_at": "2014-12-22T00:00:00+00:00",
})
assert_that(new_ie["sessions:sum"], is_(25))

def test_compute_with_other(self):
transformed_data = compute(data, {
"value-attribute": "sessions:sum",
"mapped-attribute": "browser-group",
"other-mapping": "other",
"mapping-keys": ["browser", "browserVersion"],
"mappings": {
"OldIE": ["Internet Explorer", "[2-8]{1}(.)*"],
"NewIE": ["Internet Explorer", "(9|1)(.)*"],
},
})

assert_that(len(transformed_data), is_(3))

old_ie = find_in(transformed_data, {
"browser-group": "OldIE",
"_start_at": "2014-12-22T00:00:00+00:00",
})
assert_that(old_ie["sessions:sum"], is_(21))

new_ie = find_in(transformed_data, {
"browser-group": "NewIE",
"_start_at": "2014-12-22T00:00:00+00:00",
})
assert_that(new_ie["sessions:sum"], is_(25))

other = find_in(transformed_data, {
"browser-group": "other",
"_start_at": "2014-12-22T00:00:00+00:00",
})
assert_that(other["sessions:sum"], is_(14))

0 comments on commit 70f1ff5

Please sign in to comment.