Skip to content

Commit

Permalink
Merge f670d75 into 605d2b2
Browse files Browse the repository at this point in the history
  • Loading branch information
aawdls committed Dec 5, 2019
2 parents 605d2b2 + f670d75 commit 15ce2bb
Show file tree
Hide file tree
Showing 12 changed files with 183 additions and 11 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,4 @@ int-import-graph=
# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception

2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ python:
- "3.5"
- "3.6"
- "3.7"
- "3.8"

install:
- pip install -r requirements.txt
Expand All @@ -14,3 +15,4 @@ script:

after_success:
- coveralls

2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018 Diamond Light Source Ltd
Copyright 2019 Diamond Light Source Ltd

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
85 changes: 78 additions & 7 deletions aa/pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@
correctly deduce some properties, so I have manually disabled some warnings.
"""
import datetime
import os
import re
import collections
import logging as log

import pytz
import requests

from . import data, fetcher, utils
from . import epics_event_pb2 as ee



# It is not clear to me why I can't extract this information
# from the compiled protobuf file.
TYPE_MAPPINGS = {
Expand All @@ -52,16 +55,19 @@
}


INVERSE_TYPE_MAPPINGS={cls:numeric for numeric, cls in TYPE_MAPPINGS.items()}


ESC_BYTE = b'\x1B'
NL_BYTE = b'\x0A'
CR_BYTE = b'\x0D'

# The characters sequences required to unescape AA pb file format.
PB_REPLACEMENTS = {
ESC_BYTE + b'\x01': ESC_BYTE,
ESC_BYTE + b'\x02': NL_BYTE,
ESC_BYTE + b'\x03': CR_BYTE
}
PB_REPLACEMENTS = collections.OrderedDict([
(ESC_BYTE + b'\x01', ESC_BYTE),
(ESC_BYTE + b'\x02', NL_BYTE),
(ESC_BYTE + b'\x03', CR_BYTE),
])


def unescape_bytes(byte_seq):
Expand All @@ -75,8 +81,24 @@ def unescape_bytes(byte_seq):
Returns:
the byte sequence unescaped according to the AA file format rules
"""
for r in PB_REPLACEMENTS:
byte_seq = byte_seq.replace(r, PB_REPLACEMENTS[r])
for key, value in PB_REPLACEMENTS.items():
byte_seq = byte_seq.replace(key, value)
return byte_seq


def escape_bytes(byte_seq):
"""Replace specific sub-sequences in a bytes sequence.
This escaping is defined as part of the Archiver Appliance raw file
format: https://slacmshankar.github.io/epicsarchiver_docs/pb_pbraw.html
Args:
byte_seq: any byte sequence
Returns:
the byte sequence escaped according to the AA file format rules
"""
for key, value in PB_REPLACEMENTS.items():
byte_seq = byte_seq.replace(value, key)
return byte_seq


Expand Down Expand Up @@ -106,6 +128,15 @@ def search_events(dt, chunk_info, lines):


def break_up_chunks(raw_data):
"""
Break up raw data into chunks by year
Args:
raw_data: Raw data from file
Returns:
OrderedDict: keys are years; values are lists of chunks
"""
chunks = [chunk.strip() for chunk in raw_data.split(b'\n\n')]
log.info('{} chunks in pb file'.format(len(chunks)))
year_chunks = collections.OrderedDict()
Expand All @@ -126,6 +157,18 @@ def break_up_chunks(raw_data):


def event_from_line(line, pv, year, event_type):
"""
Get an ArchiveEvent from this line
Args:
line: A line of chunks of data
pv: Name of the PV
year: Year of interest
event_type: Need to know the type of the event as key of TYPE_MAPPINGS
Returns:
ArchiveEvent
"""
unescaped = unescape_bytes(line)
event = TYPE_MAPPINGS[event_type]()
event.ParseFromString(unescaped)
Expand All @@ -135,16 +178,33 @@ def event_from_line(line, pv, year, event_type):
event.severity)



def parse_pb_data(raw_data, pv, start, end, count=None):
"""
Turn raw PB data into an ArchiveData object
Args:
raw_data: The raw data
pv: name of PV
start: datetime.datetime for start of window
end: datetime.datetime for end of window
count: return up to this many events
Returns:
An ArchiveData object
"""
year_chunks = break_up_chunks(raw_data)
events = []
# Iterate over years
for year, (chunk_info, lines) in year_chunks.items():
# Find the index of the start event
if start.year == year: # search for the start
s = search_events(start, chunk_info, lines)
elif start.year > year: # ignore this chunk
s = len(lines) - 1
else: # start.year < year: all events from the start of the year
s = 0
# Find the index of the end event
if end.year == year: # search for the end
e = search_events(end, chunk_info, lines)
elif end.year < year: # ignore this chunk
Expand Down Expand Up @@ -216,3 +276,14 @@ def _get_values(self, pv, start, end=None, count=None, request_params=None):
pb_files.append(self._get_pb_file(pv, year))
log.info('Parsing pb files {}'.format(pb_files))
return self._read_pb_files(pb_files, pv, start, end, count)


def get_iso_timestamp_for_event(year, event):
"""Returns an ISO-formatted timestamp string for the given event
and year."""
timestamp = event_timestamp(year, event)
timezone = pytz.timezone("Europe/London")
return datetime.datetime.fromtimestamp(
timestamp,
timezone
).isoformat()
51 changes: 51 additions & 0 deletions aa/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
class ParsingError(Exception):
pass


def pv_name_from_path(path):
"""Given a path to a PB file relative to storage root, infer what the
PV name is.
In the AA storage, a PV name is broken up into parts
and there are subdirectories with each part of the name.
This method is to reverse that process so you can work out
what a PV name is from the path to a PB file.
The first three slashes in the path become dashes in the
PV name. Subsequent slashes become colons.
E.g. "BL14I/MO/STAGE/01/XF.RBV:2019.pb" goes to
"BL14I-MO-STAGE-01:XF.RBV"
"""
#pattern = re.compile(r"((?:(?:[\w/]+)+)(?:.\w+)?):(\w+\.pb)")
# Strip relative specifier from beginning
if path[0] == ".":
path = path[2:]

split_by_dot = path.split(".")
# There will be one dot if PV does not specify a field
# otherwise two
if len(split_by_dot) not in [2,3] or split_by_dot[-1] != "pb":
raise ParsingError("Does not look like a protobuf file")

# Strip off the trailing part of the file name
# e.g. year
before_colon = path.split(":")[0]
path_as_list = before_colon.split("/")

# Check that PV name matches convention in that it has at least 4 parts:
# AA-BB-CC-DD[:EE:FF...]
if len(path_as_list) < 4:
raise ParsingError("Path too short")

# The first four parts are joined by dashes
pv_stem = "-".join(path_as_list[0:4])

# Subsequent parts are joined by colons
if len(path_as_list) > 4:
pv_name = ":".join([pv_stem] + path_as_list[4:])
else:
pv_name = pv_stem

return pv_name

4 changes: 4 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ protobuf
pytz
requests
tzlocal
click
# Development
pluggy>=0.12.0
pytest
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
'protobuf',
'pytz',
'requests',
'tzlocal'
'tzlocal',
]
)
Binary file added test/data/wrong_type.pb
Binary file not shown.
20 changes: 20 additions & 0 deletions test/test_pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ def test_unescape_bytes_handles_example_escaped_bytes():
assert pb.unescape_bytes(test_bytes) == b'hello' + pb.NL_BYTE + b'bye'


def test_escape_bytes_does_not_change_regular_bytes():
test_bytes = b'hello:-1|bye'
assert pb.escape_bytes(test_bytes) == test_bytes


def test_escape_bytes_handles_example_unescaped_bytes():
test_bytes = b'hello' + b'\x0A' + b'bye' + b'\x1B'
expected = b'hello' + pb.ESC_BYTE + b'\x02' + b'bye' + \
pb.ESC_BYTE + b'\x01'
assert pb.escape_bytes(test_bytes) == expected


def test_event_timestamp_gives_correct_answer_1970():
event = mock.MagicMock()
event.secondsintoyear = 10
Expand Down Expand Up @@ -152,3 +164,11 @@ def test_PbFileFetcher_read_pb_files_omits_subsequent_event(dummy_pv, jan_2001):
dec_2015 = utils.utc_datetime(2015, 12, 1)
data = fetcher._read_pb_files([filepath], dummy_pv, jan_2001, dec_2015, None)
assert len(data) == 0

def test_get_iso_timestamp_for_event_has_expected_output():
event = ee.ScalarInt()
event.secondsintoyear = 15156538
event.nano = 381175701
year = 2017
expected = "2017-06-25T11:08:58.381176+01:00"
assert pb.get_iso_timestamp_for_event(year, event) == expected
21 changes: 21 additions & 0 deletions test/test_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest
from aa import storage

@pytest.mark.parametrize("path, pv", [
("BL13J/MO/PI/01/X:2018.pb", "BL13J-MO-PI-01:X"),
("BL14I/VA/IONP/14/SP1OFF:2015.pb", "BL14I-VA-IONP-14:SP1OFF"),
("BL14I/MO/STAGE/01/ROT/FERROR:2017.pb", "BL14I-MO-STAGE-01:ROT:FERROR"),
("BL14I/MO/STAGE/01/XF.RBV:2019.pb", "BL14I-MO-STAGE-01:XF.RBV")
])
def test_pv_name_from_path(path, pv):
assert storage.pv_name_from_path(path) == pv

@pytest.mark.parametrize("path,", [
("BL13J/MO/PI:2018.pb"),
("BL13J:2018.pb"),
("BL14I/MO/STAGE/01/XF.RBV"),
])
def test_pv_name_from_path_raises_ParsingError_if_not_conventional(path):
with pytest.raises(storage.ParsingError):
storage.pv_name_from_path(path)

5 changes: 3 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@

[tox]
envlist = py27, py35, py36, py37
envlist = py27, py35, py36, py37, py38
toxworkdir = {env:TOXDIR:.tox}/aapy

[testenv]
deps = -rrequirements.txt
passenv = TRAVIS TRAVIS_*
commands =
# run tests in test directory and get coverage on aa package
python -m pytest --cov=aa test
python -m pytest -vv --cov=aa test
# run pylint only on aa package
python -m pytest --pylint --pylint-error-types=WEF aa # run pylint

0 comments on commit 15ce2bb

Please sign in to comment.