Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes required for PB tools #58

Merged
merged 5 commits into from
Dec 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,4 @@ int-import-graph=
# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception

2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ python:
- "3.5"
- "3.6"
- "3.7"
- "3.8"

install:
- pip install -r requirements.txt
Expand All @@ -14,3 +15,4 @@ script:

after_success:
- coveralls

2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018 Diamond Light Source Ltd
Copyright 2019 Diamond Light Source Ltd

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
85 changes: 78 additions & 7 deletions aa/pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@
correctly deduce some properties, so I have manually disabled some warnings.

"""
import datetime
import os
import re
import collections
import logging as log

import pytz
import requests

from . import data, fetcher, utils
from . import epics_event_pb2 as ee



# It is not clear to me why I can't extract this information
# from the compiled protobuf file.
TYPE_MAPPINGS = {
Expand All @@ -52,16 +55,19 @@
}


INVERSE_TYPE_MAPPINGS={cls:numeric for numeric, cls in TYPE_MAPPINGS.items()}


ESC_BYTE = b'\x1B'
NL_BYTE = b'\x0A'
CR_BYTE = b'\x0D'

# The characters sequences required to unescape AA pb file format.
PB_REPLACEMENTS = {
ESC_BYTE + b'\x01': ESC_BYTE,
ESC_BYTE + b'\x02': NL_BYTE,
ESC_BYTE + b'\x03': CR_BYTE
}
PB_REPLACEMENTS = collections.OrderedDict([
(ESC_BYTE + b'\x01', ESC_BYTE),
(ESC_BYTE + b'\x02', NL_BYTE),
(ESC_BYTE + b'\x03', CR_BYTE),
])


def unescape_bytes(byte_seq):
Expand All @@ -75,8 +81,24 @@ def unescape_bytes(byte_seq):
Returns:
the byte sequence unescaped according to the AA file format rules
"""
for r in PB_REPLACEMENTS:
byte_seq = byte_seq.replace(r, PB_REPLACEMENTS[r])
for key, value in PB_REPLACEMENTS.items():
byte_seq = byte_seq.replace(key, value)
return byte_seq


def escape_bytes(byte_seq):
"""Replace specific sub-sequences in a bytes sequence.

This escaping is defined as part of the Archiver Appliance raw file
format: https://slacmshankar.github.io/epicsarchiver_docs/pb_pbraw.html

Args:
byte_seq: any byte sequence
Returns:
the byte sequence escaped according to the AA file format rules
"""
for key, value in PB_REPLACEMENTS.items():
byte_seq = byte_seq.replace(value, key)
return byte_seq


Expand Down Expand Up @@ -106,6 +128,15 @@ def search_events(dt, chunk_info, lines):


def break_up_chunks(raw_data):
"""
Break up raw data into chunks by year

Args:
raw_data: Raw data from file

Returns:
OrderedDict: keys are years; values are lists of chunks
"""
chunks = [chunk.strip() for chunk in raw_data.split(b'\n\n')]
log.info('{} chunks in pb file'.format(len(chunks)))
year_chunks = collections.OrderedDict()
Expand All @@ -126,6 +157,18 @@ def break_up_chunks(raw_data):


def event_from_line(line, pv, year, event_type):
"""
Get an ArchiveEvent from this line

Args:
line: A line of chunks of data
pv: Name of the PV
year: Year of interest
event_type: Need to know the type of the event as key of TYPE_MAPPINGS

Returns:
ArchiveEvent
"""
unescaped = unescape_bytes(line)
event = TYPE_MAPPINGS[event_type]()
event.ParseFromString(unescaped)
Expand All @@ -135,16 +178,33 @@ def event_from_line(line, pv, year, event_type):
event.severity)



def parse_pb_data(raw_data, pv, start, end, count=None):
"""
Turn raw PB data into an ArchiveData object

Args:
raw_data: The raw data
pv: name of PV
start: datetime.datetime for start of window
end: datetime.datetime for end of window
count: return up to this many events

Returns:
An ArchiveData object
"""
year_chunks = break_up_chunks(raw_data)
events = []
# Iterate over years
for year, (chunk_info, lines) in year_chunks.items():
# Find the index of the start event
if start.year == year: # search for the start
s = search_events(start, chunk_info, lines)
elif start.year > year: # ignore this chunk
s = len(lines) - 1
else: # start.year < year: all events from the start of the year
s = 0
# Find the index of the end event
if end.year == year: # search for the end
e = search_events(end, chunk_info, lines)
elif end.year < year: # ignore this chunk
Expand Down Expand Up @@ -216,3 +276,14 @@ def _get_values(self, pv, start, end=None, count=None, request_params=None):
pb_files.append(self._get_pb_file(pv, year))
log.info('Parsing pb files {}'.format(pb_files))
return self._read_pb_files(pb_files, pv, start, end, count)


def get_iso_timestamp_for_event(year, event):
"""Returns an ISO-formatted timestamp string for the given event
and year."""
timestamp = event_timestamp(year, event)
timezone = pytz.timezone("Europe/London")
return datetime.datetime.fromtimestamp(
timestamp,
timezone
).isoformat()
51 changes: 51 additions & 0 deletions aa/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
class ParsingError(Exception):
pass


def pv_name_from_path(path):
"""Given a path to a PB file relative to storage root, infer what the
PV name is.

In the AA storage, a PV name is broken up into parts
and there are subdirectories with each part of the name.
This method is to reverse that process so you can work out
what a PV name is from the path to a PB file.

The first three slashes in the path become dashes in the
PV name. Subsequent slashes become colons.

E.g. "BL14I/MO/STAGE/01/XF.RBV:2019.pb" goes to
"BL14I-MO-STAGE-01:XF.RBV"
"""
#pattern = re.compile(r"((?:(?:[\w/]+)+)(?:.\w+)?):(\w+\.pb)")
# Strip relative specifier from beginning
if path[0] == ".":
path = path[2:]

split_by_dot = path.split(".")
# There will be one dot if PV does not specify a field
# otherwise two
if len(split_by_dot) not in [2,3] or split_by_dot[-1] != "pb":
raise ParsingError("Does not look like a protobuf file")

# Strip off the trailing part of the file name
# e.g. year
before_colon = path.split(":")[0]
path_as_list = before_colon.split("/")

# Check that PV name matches convention in that it has at least 4 parts:
# AA-BB-CC-DD[:EE:FF...]
if len(path_as_list) < 4:
raise ParsingError("Path too short")

# The first four parts are joined by dashes
pv_stem = "-".join(path_as_list[0:4])

# Subsequent parts are joined by colons
if len(path_as_list) > 4:
pv_name = ":".join([pv_stem] + path_as_list[4:])
else:
pv_name = pv_stem

return pv_name

4 changes: 4 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ protobuf
pytz
requests
tzlocal
click
# Development
pluggy>=0.12.0
pytest
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
'protobuf',
'pytz',
'requests',
'tzlocal'
'tzlocal',
]
)
Binary file added test/data/wrong_type.pb
Binary file not shown.
20 changes: 20 additions & 0 deletions test/test_pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ def test_unescape_bytes_handles_example_escaped_bytes():
assert pb.unescape_bytes(test_bytes) == b'hello' + pb.NL_BYTE + b'bye'


def test_escape_bytes_does_not_change_regular_bytes():
test_bytes = b'hello:-1|bye'
assert pb.escape_bytes(test_bytes) == test_bytes


def test_escape_bytes_handles_example_unescaped_bytes():
test_bytes = b'hello' + b'\x0A' + b'bye' + b'\x1B'
expected = b'hello' + pb.ESC_BYTE + b'\x02' + b'bye' + \
pb.ESC_BYTE + b'\x01'
assert pb.escape_bytes(test_bytes) == expected


def test_event_timestamp_gives_correct_answer_1970():
event = mock.MagicMock()
event.secondsintoyear = 10
Expand Down Expand Up @@ -152,3 +164,11 @@ def test_PbFileFetcher_read_pb_files_omits_subsequent_event(dummy_pv, jan_2001):
dec_2015 = utils.utc_datetime(2015, 12, 1)
data = fetcher._read_pb_files([filepath], dummy_pv, jan_2001, dec_2015, None)
assert len(data) == 0

def test_get_iso_timestamp_for_event_has_expected_output():
event = ee.ScalarInt()
event.secondsintoyear = 15156538
event.nano = 381175701
year = 2017
expected = "2017-06-25T11:08:58.381176+01:00"
assert pb.get_iso_timestamp_for_event(year, event) == expected
21 changes: 21 additions & 0 deletions test/test_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest
from aa import storage

@pytest.mark.parametrize("path, pv", [
("BL13J/MO/PI/01/X:2018.pb", "BL13J-MO-PI-01:X"),
("BL14I/VA/IONP/14/SP1OFF:2015.pb", "BL14I-VA-IONP-14:SP1OFF"),
("BL14I/MO/STAGE/01/ROT/FERROR:2017.pb", "BL14I-MO-STAGE-01:ROT:FERROR"),
("BL14I/MO/STAGE/01/XF.RBV:2019.pb", "BL14I-MO-STAGE-01:XF.RBV")
])
def test_pv_name_from_path(path, pv):
assert storage.pv_name_from_path(path) == pv

@pytest.mark.parametrize("path,", [
("BL13J/MO/PI:2018.pb"),
("BL13J:2018.pb"),
("BL14I/MO/STAGE/01/XF.RBV"),
])
def test_pv_name_from_path_raises_ParsingError_if_not_conventional(path):
with pytest.raises(storage.ParsingError):
storage.pv_name_from_path(path)

5 changes: 3 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@

[tox]
envlist = py27, py35, py36, py37
envlist = py27, py35, py36, py37, py38
toxworkdir = {env:TOXDIR:.tox}/aapy

[testenv]
deps = -rrequirements.txt
passenv = TRAVIS TRAVIS_*
commands =
# run tests in test directory and get coverage on aa package
python -m pytest --cov=aa test
python -m pytest -vv --cov=aa test
# run pylint only on aa package
python -m pytest --pylint --pylint-error-types=WEF aa # run pylint