/
breakdown_result.py
92 lines (70 loc) · 2.94 KB
/
breakdown_result.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from typing import List
from vortexasdk.api.entity_flattening import convert_to_flat_dict
from vortexasdk.api.breakdown_item import BreakdownItem
import pandas as pd
import functools
import os
from multiprocessing.pool import Pool
from vortexasdk.api.search_result import Result
from vortexasdk.logger import get_logger
from vortexasdk.result_conversions import create_dataframe, create_list
logger = get_logger(__name__)
class BreakdownResult(Result):
"""Container class that holds the result obtained from calling a breakdown endpoint."""
def to_list(self) -> List[BreakdownItem]:
"""Represents time series as a list."""
# noinspection PyTypeChecker
return create_list(super().to_list(), BreakdownItem)
def to_df(self, columns=None) -> pd.DataFrame:
"""Represents the timeseries as a dataframe.
# Arguments
columns: Output columns present in the `pd.DataFrame`.
Enter `columns='all'` to return all available columns.
Enter `columns=None` to use `breakdown_result.DEFAULT_COLUMNS`.
Returns a `pd.DataFrame`, of time series items with columns:
key: The breakdown key
value: The value of the breakdown for a given key
count: The number of records contributing to this time series record.
breakdown: additional information about the aggregation.
# Example:
If we're aggregating average vessel speeds by day, then the `key` column holds the date,
the `value` holds the average speed on the day, the `count` holds
the number of vessel movements contributing towards this average, and breakdown
provides further information about the aggregation.
By default, the columns returned are something along the lines of.
```python
DEFAULT_COLUMNS = [
'key',
'value',
'count',
'breakdown.0.label',
'breakdown.0.count',
'breakdown.0.value'
]
```
Note that there can be more than one breakdown entry in the response. To access further
breakdown objects, replace the index 0 with another number (1,2,3 etc.), such as
['breakdown.1.label', 'breakdown.2.label', 'breakdown.3.label] etc.'
"""
if columns is None:
columns = DEFAULT_COLUMNS
logger.debug("Converting each breakdown to a flat dictionary")
flatten = functools.partial(convert_to_flat_dict, cols=columns)
with Pool(os.cpu_count()) as pool:
records = pool.map(flatten, super().to_list())
df = create_dataframe(
columns=columns,
default_columns=DEFAULT_COLUMNS,
data=records,
logger_description="Breakdown",
)
df["key"] = pd.to_datetime(df["key"])
return df
DEFAULT_COLUMNS = [
"key",
"value",
"count",
"breakdown.0.label",
"breakdown.0.count",
"breakdown.0.value",
]