Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for grouped time series #51

Merged
merged 6 commits into from Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 42 additions & 21 deletions hts/functions.py
Expand Up @@ -21,34 +21,55 @@ def to_sum_mat(ntree: NAryTreeT):

Returns
-------
numpy.ndarray
Summing matrix.

"""
nodes = ntree.level_order_traversal()
node_labels = ntree.get_level_order_labels()
num_at_level = list(map(sum, nodes))
columns = num_at_level[-1]
bl_mat = np.identity(columns)

# Initialize summing matrix with bottom level rows
sum_mat = np.identity(columns)

# Names of each row in summing matrix.
sum_mat_labels = []

# Bottom level matrix labels, with indices correspoding to column in summing matrix
bl_mat_idx_ref = node_labels[-1]

# Skip total and bottom level of tree. Rows added outside of loop.
for level in node_labels[1:-1]:
for label in level:
# Exclude duplicates specified in tree
if label not in sum_mat_labels:
row = []
for bl_element in bl_mat_idx_ref:
# Check if the bottom level element is part of label
is_component = all(
[True if l in bl_element else False for l in label.split("_")]
)
if is_component:
row.append(1)
else:
row.append(0)

# Add row correspoding to label to top of summing matrix
row = np.array(row)
sum_mat = np.vstack((row, sum_mat))
sum_mat_labels.append(label)

# Add top as first row in summing matrix
top = np.ones(columns)
final_mat = bl_mat
num_levels = len(num_at_level)

for lev in range(num_levels - 1):
summing = nodes[-(lev + 1)]
count = 0
num2sum_ind = 0
B = np.zeros([num_at_level[-1]])
for num2sum in summing:
num2sum_ind += num2sum
a = bl_mat[count:num2sum_ind, :]
count += num2sum
if np.all(B == 0):
B = a.sum(axis=0)
else:
B = np.vstack((B, a.sum(axis=0)))
final_mat = np.vstack((B, final_mat))
bl_mat = B
sum_mat = np.vstack((top, sum_mat))

# Reverse list of labels to match summing matrix, since vstack and append worked in the opposite order.
# Not currently returned, but could be for information or matrix alignment.
sum_mat_labels.reverse()
sum_mat_labels = ["total"] + sum_mat_labels + bl_mat_idx_ref

final_mat = np.vstack((top, final_mat))
return final_mat
return sum_mat


def project(
Expand Down
25 changes: 25 additions & 0 deletions hts/hierarchy/__init__.py
Expand Up @@ -316,6 +316,31 @@ def level_order_traversal(self: NAryTreeT) -> List[List[int]]:
res[li].append(len(n.children))
return res[:-1]

def get_level_order_labels(self: NAryTreeT) -> List[List[str]]:
"""
Get the associated node labels from the NAryTreeT level_order_traversal().

Parameters
----------
self: NAryTreeT
Tree being searched.

Returns
-------
List[List[str]]
Node labels corresponding to level order traversal.
"""
labels = []
q = deque([(self, 0)])
while q:
n, li = q.popleft()
if len(labels) < li + 1:
labels.append([])
for i in n.children:
q.append((i, li + 1))
labels[li].append(n.key)
return labels

def add_child(self, key=None, item=None, exogenous=None) -> NAryTreeT:
child = HierarchyTree(key=key, item=item, exogenous=exogenous, parent=self)
self.children.append(child)
Expand Down
184 changes: 184 additions & 0 deletions tests/unit/test_functions.py
@@ -1,5 +1,7 @@
import numpy
import pandas

import hts.hierarchy
from hts.functions import to_sum_mat


Expand All @@ -17,3 +19,185 @@ def test_sum_mat_mv(mv_tree):
shp = mat.shape
assert shp[0] == mv_tree.num_nodes() + 1
assert shp[1] == mv_tree.leaf_sum()


def test_sum_mat_hierarchical():
hierarchy = {"total": ["A", "B"], "A": ["A_X", "A_Y", "A_Z"], "B": ["B_X", "B_Y"]}
hier_df = pandas.DataFrame(
data={
"total": [],
"A": [],
"B": [],
"A_X": [],
"A_Y": [],
"A_Z": [],
"B_X": [],
"B_Y": [],
}
)

tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df)
sum_mat = to_sum_mat(tree)

expected_sum_mat = numpy.array(
[
[1, 1, 1, 1, 1], # total
[0, 0, 0, 1, 1], # B
[1, 1, 1, 0, 0], # A
[1, 0, 0, 0, 0], # A_X
[0, 1, 0, 0, 0], # A_Y
[0, 0, 1, 0, 0], # A_Z
[0, 0, 0, 1, 0], # B_X
[0, 0, 0, 0, 1],
]
) # B_Y

numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)


def test_sum_mat_grouped():
hierarchy = {
"total": ["A", "B", "X", "Y"],
"A": ["A_X", "A_Y"],
"B": ["B_X", "B_Y"],
}
grouped_df = pandas.DataFrame(
data={
"total": [],
"A": [],
"B": [],
"X": [],
"Y": [],
"A_X": [],
"A_Y": [],
"B_X": [],
"B_Y": [],
}
)

tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, grouped_df)
sum_mat = to_sum_mat(tree)

expected_sum_mat = numpy.array(
[
[1, 1, 1, 1], # total
[0, 1, 0, 1], # Y
[1, 0, 1, 0], # X
[0, 0, 1, 1], # B
[1, 1, 0, 0], # A
[1, 0, 0, 0], # A_X
[0, 1, 0, 0], # A_Y
[0, 0, 1, 0], # B_X
[0, 0, 0, 1], # B_Y
]
)

numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)


def test_sum_mat_visnights_hier(visnights_hier):
hier_df = pandas.DataFrame(
data={
"total": [],
"VIC": [],
"QLD": [],
"SAU": [],
"WAU": [],
"OTH": [],
"NSW": [],
"NSW_Metro": [],
"NSW_NthCo": [],
"NSW_NthIn": [],
"NSW_SthCo": [],
"NSW_SthIn": [],
"OTH_Metro": [],
"OTH_NoMet": [],
"QLD_Cntrl": [],
"QLD_Metro": [],
"QLD_NthCo": [],
"SAU_Coast": [],
"SAU_Inner": [],
"SAU_Metro": [],
"VIC_EstCo": [],
"VIC_Inner": [],
"VIC_Metro": [],
"VIC_WstCo": [],
"WAU_Coast": [],
"WAU_Inner": [],
"WAU_Metro": [],
}
)

tree = hts.hierarchy.HierarchyTree.from_nodes(visnights_hier, hier_df)
sum_mat = to_sum_mat(tree)

expected_sum_mat = numpy.array(
[
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # total
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], # VIC
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], # QLD
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], # SAU
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU
[0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_Metro
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_NthCo
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_NthIn
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_SthCo
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_SthIn
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH_Metro
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH_NoMet
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Coast
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Inner
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Metro
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # SAU_Coast
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], # SAU_Inner
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], # SAU_Metro
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], # QLD_Cntrl
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # QLD_Metro
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], # QLD_NthCo
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], # VIC_EstCo
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], # VIC_Inner
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], # VIC_Metro
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], # VIC_WstCo
]
)

numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)


def test_demo_unique_constraint():
# Example https://otexts.com/fpp2/hts.html
# Does not work when you have elements that are named the same, but represent
# different levels in the hierarchy. See expected_sum_mat below for example.
hierarchy = {"total": ["A", "B"], "A": ["AA", "AB", "AC"], "B": ["BA", "BB"]}
hier_df = pandas.DataFrame(
data={
"total": [],
"A": [],
"B": [],
"AA": [],
"AB": [],
"AC": [],
"BA": [],
"BB": [],
}
)

tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df)
sum_mat = to_sum_mat(tree)

expected_sum_mat = numpy.array(
[
[1, 1, 1, 1, 1], # total
[0, 1, 0, 1, 1], # B, Incorrectly finds B in AB
[1, 1, 1, 1, 0], # A, Incorrectly finds A in BA
[1, 0, 0, 0, 0], # AA
[0, 1, 0, 0, 0], # AB
[0, 0, 1, 0, 0], # AC
[0, 0, 0, 1, 0], # BA
[0, 0, 0, 0, 1], # BB
]
)

numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)