Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Parse EVL volumetrics
Browse files Browse the repository at this point in the history
  • Loading branch information
phss committed Aug 27, 2013
1 parent b39d453 commit e60a8e3
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 148 deletions.
13 changes: 6 additions & 7 deletions backdrop/contrib/evl_upload_filters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime
import itertools
from backdrop.contrib.evl_volumetrics import remove_summary_columns, extract_transaction_rows, create_transaction_data
from backdrop.core.timeutils import parse_time_as_utc, as_utc


Expand Down Expand Up @@ -137,14 +138,12 @@ def date_or_none(string):


def volumetrics(sheets):
rows = list(list(sheets)[2])
sheet = list(list(sheets)[2])

yield ["_timestamp", "service", "channel", "transaction", "volume"]

first_date = as_utc(datetime.strptime(rows[3][3], "%b %Y"))
service = "tax-disc"
channel = "assisted-digital"
transaction = rows[4][2]
volume = rows[4][3]
header, rows = extract_transaction_rows(remove_summary_columns(sheet))

yield [first_date.isoformat(), service, channel, transaction, volume]
for row in rows:
for data in create_transaction_data(header, row):
yield data
85 changes: 85 additions & 0 deletions backdrop/contrib/evl_volumetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from datetime import datetime
import re
from backdrop.core.timeutils import as_utc

def extract_column_header(sheet):
HEADER_INDEX = 3
return sheet[HEADER_INDEX]


def extract_transaction_rows(sheet):
TRANSACTION_INDEXES = {
4: ["Assisted Digital", "Relicensing"],
5: ["Assisted Digital", "Relicensing"],
7: ["Assisted Digital", "SORN"],
10: ["Fully Digital", "Relicensing"],
11: ["Fully Digital", "Relicensing"],
12: ["Fully Digital", "Relicensing"],
13: ["Fully Digital", "Relicensing"],
15: ["Fully Digital", "SORN"],
16: ["Fully Digital", "SORN"],
17: ["Fully Digital", "SORN"],
18: ["Fully Digital", "SORN"],
21: ["Manual", "Relicensing"],
22: ["Manual", "Relicensing"],
23: ["Manual", "Relicensing"],
25: ["Manual", "SORN"],
26: ["Manual", "SORN"],
27: ["Manual", "SORN"],
28: ["Manual", "SORN"],
29: ["Manual", "SORN"],
30: ["Manual", "SORN"],
31: ["Manual", "SORN"],
}

def transaction_row(index):
channel_service = TRANSACTION_INDEXES[index]
return channel_service + sheet[index][2:]

return extract_column_header(sheet), map(transaction_row, TRANSACTION_INDEXES.keys())


def create_transaction_data(header, row):
CHANNEL_INDEX = 0
SERVICE_INDEX = 1
TRANSACTION_NAME_INDEX = 2
DATES_START_INDEX = 3
SERVICES = {
"Relicensing": "tax-disc",
"SORN": "sorn"
}

volumes = zip(header, row)[DATES_START_INDEX:]

def transaction_data(date_volume):
date, volume = date_volume
date = as_utc(datetime.strptime(date, "%b %Y"))
service = SERVICES[row[SERVICE_INDEX]]
channel = row[CHANNEL_INDEX].lower().replace(" ", "-")
transaction = row[TRANSACTION_NAME_INDEX]

return [date.isoformat(), service, channel, transaction, volume]

return map(transaction_data, volumes)


def remove_summary_columns(sheet):
DATES_START_INDEX = 3
DATE_REGEXP = re.compile("[A-Z][a-z]{2}\s\d{4}")

header = extract_column_header(sheet)

def add_date_index(mem, i):
if bool(DATE_REGEXP.match(header[i])):
mem.append(i)
return mem
else:
return mem

date_indexes = reduce(add_date_index, range(DATES_START_INDEX,len(header)), [])

def remove_columns_from_row(row):
return row[:DATES_START_INDEX] + [row[i] for i in date_indexes]


return map(remove_columns_from_row, sheet)
15 changes: 14 additions & 1 deletion backdrop/core/upload/parse_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@
from backdrop.core.errors import ParseError
from backdrop.core.timeutils import utc

class ExcelError(object):
def __init__(self, description):
self.description = description

def __eq__(self, other):
return isinstance(other, self.__class__) and \
self.description == other.description

def __ne__(self, other):
return not self.__eq__(other)

EXCEL_ERROR = ExcelError("error in cell")

def parse_excel(incoming_data):
book = xlrd.open_workbook(file_contents=incoming_data.read())
Expand All @@ -26,5 +38,6 @@ def _extract_cell_value(cell, book):
time_tuple = xlrd.xldate_as_tuple(cell.value, book.datemode)
return utc(datetime.datetime(*time_tuple)).isoformat()
elif cell.ctype == xlrd.XL_CELL_ERROR:
raise ParseError("Error encountered in cell")
logging.warn("Encountered errors in cells when parsing excel file")
return EXCEL_ERROR
return cell.value
3 changes: 2 additions & 1 deletion backdrop/write/config/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
ALLOW_TEST_SIGNIN = True
BUCKET_AUTO_ID_KEYS = {
"bucket_with_auto_id": ["key", "start_at", "end_at"],
"bucket_with_timestamp_auto_id": ["_timestamp", "key"]
"bucket_with_timestamp_auto_id": ["_timestamp", "key"],
"evl_volumetrics": ["_timestamp", "service", "transaction"],
}
BUCKET_UPLOAD_FORMAT = {
"bucket_with_timestamp_auto_id": "excel",
Expand Down
8 changes: 1 addition & 7 deletions features/contrib/evl_upload.feature
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,10 @@ Feature: EVL Upload
{"_timestamp": "2007-07-01T00:00:00+00:00", "_id": "2007-07-01", "satisfaction_tax_disc": 1.1662755514934828, "satisfaction_sorn": 1.3581011781786714}
"""

@wip
Scenario: Upload evl volumetrics
Scenario: Upload evl volumetrics
Given a file named "evl-volumetrics.xls" with fixture "contrib/evl-volumetrics.xls"
and I am logged in
when I go to "/evl_volumetrics/upload"
and I enter "evl-volumetrics.xls" into the file upload field
and I click "Upload"
then the platform should have "336" items stored in "evl_volumetrics"
and the "evl_volumetrics" bucket should have items:
"""
{"_timestamp": "2012-08-01T00:00:00+00:00", "_id": "2013-08-01", "channel": "manual", "service": "tax-disc", "transaction": "V-V890 Another transaction", "volume" : 123456},
{"_timestamp": "2013-05-01T00:00:00+00:00", "_id": "2007-07-01", "channel": "assisted-digital", "service": "sorn", "transaction": "V-V11 Some transaction", "volume": 987654}
"""
73 changes: 36 additions & 37 deletions tests/contrib/test_evl_upload_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,44 +110,43 @@ def test_converts_customer_satisfaction_raw_data_to_normalised_data(self):
["2013-06-01T00:00:00+00:00", "2013-06-01", 0.3, 0.4],
["2013-07-01T00:00:00+00:00", "2013-07-01", 0.5, 0.6]]))

@nottest
def test_volumetrics_raw_data_to_normalised_data(self):
raw_data = [
["Ignore"],
["Ignore"],
["Ignore"],
["Channel Descriptions", "", "Transaction", "Apr 2012"],
["Assisted Digital", "Relicensing", "V-V10 Licence Application Post Office", 1000],
["", "", "V-V11 Licence Renewal Reminder Post Office", 1001],
["Ignore"],
["", "SORN", "V-V11 Some transaction", 1003],
["Ignore"],
["Ignore"],
["Fully Digital", "Relicensing", "V-V10 Licence Application EVL", 1006],
["", "", "V-V11 Fleets", 1007],
["", "", "V-V11 Licence Renewal Reminder EVL", 1008],
["", "", "V-V85 and V85/1 HGV Licence Application EVL", 1009],
["Ignore"],
["", "SORN", "V-V11 SORN EVL", 1011],
["", "", "V-V85/1 HGV SORN Declaration EVL", 1012],
["", "", "V-V890 SORN Declaration EVL", 1013],
["", "", "V-V890 SORN Declaration Fleets", 1014],
["Ignore"],
["Ignore"],
["Manual", "Relicensing", "V-V890 Another transaction", 1017],
["", "", "V-V11 Licence Renewal Reminder Local Office", 1018],
["", "", "V-V85 and V85/1 HGV Licence Application", 1019],
["Ignore"],
["", "SORN", "V-V11 SORN Local Office", 1021],
["", "", "V-V85/1 HGV SORN Declaration", 1022],
["", "", "V-V890 SORN Declaration", 1023],
["", "", "V-V890 SORN Declaration Key from Image", 1024],
["", "", "V-V890 SORN Declaration Refunds Input", 1025],
["", "", "V-V890 SORN Declaration Vehicles Input", 1026],
["", "", "V-V890 SORN Declaration Vehicles Triage", 1027],
["Ignore"],
["Ignore"],
["Ignore"]
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["Channel Descriptions", "", "Transaction", "Apr 2012", "2012/13 Total", "Mar 2013"],
["Assisted Digital", "Relicensing", "V-V10 Licence Application Post Office", 1000, 2000, 3000],
["", "", "V-V11 Licence Renewal Reminder Post Office", 1001, 2001, 3001,],
["_", "_", "_", "_", "_", "_"],
["", "SORN", "V-V11 Some transaction", 1003, 2003, 3003],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["Fully Digital", "Relicensing", "V-V10 Licence Application EVL", 1006, 2006, 3006],
["", "", "V-V11 Fleets", 1007, 2007, 3007],
["", "", "V-V11 Licence Renewal Reminder EVL", 1008, 2008, 3008],
["", "", "V-V85 and V85/1 HGV Licence Application EVL", 1009, 2008, 3008],
["_", "_", "_", "_", "_", "_"],
["", "SORN", "V-V11 SORN EVL", 1011, 2011, 3011],
["", "", "V-V85/1 HGV SORN Declaration EVL", 1012, 2012, 3012],
["", "", "V-V890 SORN Declaration EVL", 1013, 2013, 3013],
["", "", "V-V890 SORN Declaration Fleets", 1014, 2014, 3014],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["Manual", "Relicensing", "V-V890 Another transaction", 1017, 2017, 3017],
["", "", "V-V11 Licence Renewal Reminder Local Office", 1018, 2018, 3018],
["", "", "V-V85 and V85/1 HGV Licence Application", 1019, 2019, 3019],
["_", "_", "_", "_", "_", "_"],
["", "SORN", "V-V11 SORN Local Office", 1021, 2021, 3021],
["", "", "V-V85/1 HGV SORN Declaration", 1022, 2022, 3022],
["", "", "V-V890 SORN Declaration", 1023, 2023, 3023],
["", "", "V-V890 SORN Declaration Key from Image", 1024, 2024, 3024],
["", "", "V-V890 SORN Declaration Refunds Input", 1025, 2025, 3025],
["", "", "V-V890 SORN Declaration Vehicles Input", 1026, 2026, 3026],
["", "", "V-V890 SORN Declaration Vehicles Triage", 1027, 2027, 3027],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
]

data = volumetrics([[], [], raw_data])
Expand All @@ -156,4 +155,4 @@ def test_volumetrics_raw_data_to_normalised_data(self):

assert_that(header, is_(["_timestamp", "service", "channel", "transaction", "volume"]))
assert_that(rows[0], is_(["2012-04-01T00:00:00+00:00", "tax-disc", "assisted-digital", "V-V10 Licence Application Post Office", 1000]))
assert_that(rows[-1], is_(["2012-04-01T00:00:00+00:00", "sorn", "manual", "V-V890 SORN Declaration Vehicles Triage", 1027]))
assert_that(rows[-1], is_(["2013-03-01T00:00:00+00:00", "sorn", "manual", "V-V890 SORN Declaration Vehicles Triage", 3027]))
90 changes: 2 additions & 88 deletions tests/contrib/test_evl_volumetrics.py
Original file line number Diff line number Diff line change
@@ -1,94 +1,8 @@
from itertools import ifilter
from pprint import pprint
import re
import unittest
from hamcrest import *
from datetime import datetime
from backdrop.core.timeutils import as_utc


def extract_column_header(sheet):
HEADER_INDEX = 3
return sheet[HEADER_INDEX]


def extract_transaction_rows(sheet):
TRANSACTION_INDEXES = {
4: ["Assisted Digital", "Relicensing"],
5: ["Assisted Digital", "Relicensing"],
7: ["Assisted Digital", "SORN"],
10: ["Fully Digital", "Relicensing"],
11: ["Fully Digital", "Relicensing"],
12: ["Fully Digital", "Relicensing"],
13: ["Fully Digital", "Relicensing"],
15: ["Fully Digital", "SORN"],
16: ["Fully Digital", "SORN"],
17: ["Fully Digital", "SORN"],
18: ["Fully Digital", "SORN"],
21: ["Manual", "Relicensing"],
22: ["Manual", "Relicensing"],
23: ["Manual", "Relicensing"],
25: ["Manual", "SORN"],
26: ["Manual", "SORN"],
27: ["Manual", "SORN"],
28: ["Manual", "SORN"],
29: ["Manual", "SORN"],
30: ["Manual", "SORN"],
31: ["Manual", "SORN"],
}

def transaction_row(index):
channel_service = TRANSACTION_INDEXES[index]
return channel_service + sheet[index][2:4]

return extract_column_header(sheet), map(transaction_row, TRANSACTION_INDEXES.keys())


def create_transaction_data(header, row):
CHANNEL_INDEX = 0
SERVICE_INDEX = 1
TRANSACTION_NAME_INDEX = 2
DATES_START_INDEX = 3
SERVICES = {
"Relicensing": "tax-disc",
"SORN": "sorn"
}

volumes = zip(header, row)[DATES_START_INDEX:]

def transaction_data(date_volume):
date, volume = date_volume
date = as_utc(datetime.strptime(date, "%b %Y"))
service = SERVICES[row[SERVICE_INDEX]]
channel = row[CHANNEL_INDEX].lower().replace(" ", "-")
transaction = row[TRANSACTION_NAME_INDEX]

return [date.isoformat(), service, channel, transaction, volume]

return map(transaction_data, volumes)


def remove_summary_columns(sheet):
DATES_START_INDEX = 3
DATE_REGEXP = re.compile("[A-Z][a-z]{2}\s\d{4}")

header = extract_column_header(sheet)

def add_date_index(mem, i):
if bool(DATE_REGEXP.match(header[i])):
mem.append(i)
return mem
else:
return mem

date_indexes = reduce(add_date_index, range(DATES_START_INDEX,len(header)), [])

def remove_columns_from_row(row):
return row[:DATES_START_INDEX] + [row[i] for i in date_indexes]


return map(remove_columns_from_row, sheet)
from hamcrest import *

from backdrop.contrib.evl_volumetrics import extract_transaction_rows, create_transaction_data, remove_summary_columns


class TestEVLVolumetrics(unittest.TestCase):
Expand Down
13 changes: 6 additions & 7 deletions tests/core/upload/test_parse_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from hamcrest import assert_that, only_contains, contains
from backdrop.core.errors import ParseError

from backdrop.core.upload.parse_excel import parse_excel
from backdrop.core.upload.parse_excel import parse_excel, ExcelError, EXCEL_ERROR
from tests.support.test_helpers import fixture_path, d_tz


Expand Down Expand Up @@ -33,12 +33,11 @@ def test_parse_xls_file(self):
)))

def test_parse_xlsx_with_error(self):
def traverse_file(filename):
for sheet in self._parse_excel(filename):
for _ in sheet:
pass

self.assertRaises(ParseError, traverse_file, "error.xlsx")
assert_that(self._parse_excel("error.xlsx"), contains(contains(
["date", "name", "number", "error"],
["2013-12-03T13:30:00+00:00", "test1", 12, EXCEL_ERROR],
["2013-12-04T00:00:00+00:00", "test2", 34, EXCEL_ERROR],
)))

def test_parse_xlsx_with_multiple_sheets(self):
assert_that(self._parse_excel("multiple_sheets.xlsx"), contains(
Expand Down

0 comments on commit e60a8e3

Please sign in to comment.