**Author:** J. Žovák, `482857@mail.muni.cz`

In [None]:
import numpy as np
from itertools import product

In [None]:
def precompute_bucket_ids(n_categories):
    # generate all possible combinations of categories for each level
    all_combinations = list(product(*[range(n) for n in n_categories]))
    bucket_ids = np.array(all_combinations, dtype=np.int32)
    return bucket_ids

n_categories = [2, 2, 2]  
bucket_ids = precompute_bucket_ids(n_categories)
bucket_ids

In [None]:
def compute_ratios_for_attribute_filters(data_prediction, attribute_filters, n_categories):
    all_bucket_ids = precompute_bucket_ids(n_categories)
    
    filters_ratios_list = []

    shifted_attribute_filters = attribute_filters - 1

    for filter_array in shifted_attribute_filters:
        filter_data = data_prediction[filter_array.flatten()]

        matches = np.all(filter_data[:, None] == all_bucket_ids, axis=2)
        
        counts = matches.sum(axis=0)

        total_elements = len(filter_data)
        ratios = counts / total_elements

        filter_ratios_dict = {tuple(bucket_id): ratio for bucket_id, ratio in zip(all_bucket_ids, ratios)}

        filters_ratios_list.append(filter_ratios_dict)

    return filters_ratios_list

data_prediction = np.array([[0,0,0], [0,0,1], [0,1,1], [1,1,1]])
attribute_filter = np.array([[1, 2, 3, 4]])

filters_ratios = compute_ratios_for_attribute_filters(data_prediction, attribute_filter, n_categories)
filters_ratios

In [None]:
def combine_probabilities(filters_ratios):
    combined_filters_ratios = []

    for filter_ratio in filters_ratios:
        combined_ratio = filter_ratio.copy()
        temp_dict = {}  
    
        for bucket_tuple, probability in filter_ratio.items():
            for idx in range(len(bucket_tuple)):
                if bucket_tuple[idx] != -1: 
                    modified_tuple = bucket_tuple[:idx] + (-1,) * (len(bucket_tuple) - idx)
                    temp_dict[modified_tuple] = temp_dict.get(modified_tuple, 0) + probability

        for new_tuple, new_probability in temp_dict.items():
            combined_ratio[new_tuple] = new_probability

        combined_filters_ratios.append(combined_ratio)
        
    return combined_filters_ratios


In [None]:
combined_filter_ratios = combine_probabilities(filters_ratios)
combined_filter_ratios

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

def visualize_tree_dict_top_to_bottom(tree_dict):
    G = nx.DiGraph()

    for node, weight in tree_dict.items():
        for parent, _ in tree_dict.items():
            if parent.count(-1) == node.count(-1) + 1:
                if all(p == n or p == -1 for p, n in zip(parent, node)):
                    G.add_edge(parent, node, weight=weight)

    # arrange nodes from top to bottom
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw(G, pos, with_labels=True, node_size=700, node_color="lightblue")
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

    plt.show()

visualize_tree_dict_top_to_bottom(combined_filter_ratios[0])