In [None]:
import numpy as np
from itertools import product

In [None]:
def precompute_bucket_ids(n_categories):
    # Generate all possible combinations of categories for each level
    all_combinations = list(product(*[range(n) for n in n_categories]))
    # Convert the combinations to a NumPy array
    bucket_ids = np.array(all_combinations, dtype=np.int32)
    return bucket_ids

# Example usage
n_categories = [2, 2, 2]  # Replace with your actual n_categories
bucket_ids = precompute_bucket_ids(n_categories)
bucket_ids

In [None]:
def compute_ratios_for_attribute_filters(data_prediction, attribute_filters, n_categories):
    # Precompute all possible bucket IDs
    all_bucket_ids = precompute_bucket_ids(n_categories)
    
    # Initialize the list to hold NumPy arrays for each attribute filter's ratios
    filters_ratios_list = []

    # Adjust for zero-based indexing in attribute_filter
    shifted_attribute_filters = attribute_filters - 1

    # Iterate through each attribute filter
    for filter_array in shifted_attribute_filters:
        # Get the data for the current filter
        filter_data = data_prediction[filter_array.flatten()]

        # Count occurrences in each bucket for the current filter
        # Create a 2D array of shape (len(filter_data), len(all_bucket_ids)) where each element is True if the
        # filter_data matches the corresponding bucket_id, else False
        matches = np.all(filter_data[:, None] == all_bucket_ids, axis=2)
        
        # Sum the matches along the first axis to count occurrences in each bucket
        counts = matches.sum(axis=0)

        # Calculate the ratio for each bucket
        total_elements = len(filter_data)
        ratios = counts / total_elements

        # Convert the ratios to a dictionary mapping bucket_id to ratio
        filter_ratios_dict = {tuple(bucket_id): ratio for bucket_id, ratio in zip(all_bucket_ids, ratios)}

        # Add the dictionary to the list
        filters_ratios_list.append(filter_ratios_dict)

    return filters_ratios_list

# Example usage
data_prediction = np.array([[0,0,0], [0,0,1], [0,1,1], [1,1,1]])  # Example data
attribute_filter = np.array([[1, 2, 3, 4]])  # Example attribute filters

# Compute the ratios
filters_ratios = compute_ratios_for_attribute_filters(data_prediction, attribute_filter, n_categories)
filters_ratios

In [None]:
def combine_probabilities(filters_ratios):
    combined_filters_ratios = []

    for filter_ratio in filters_ratios:
        combined_ratio = filter_ratio.copy()  # Copy the original ratios
        temp_dict = {}  # Temporary dictionary for the new combined probabilities
    
        for bucket_tuple, probability in filter_ratio.items():
            # Create and aggregate probabilities for all modified tuples
            for idx in range(len(bucket_tuple)):
                if bucket_tuple[idx] != -1:  # Identifying a non-negative element
                    modified_tuple = bucket_tuple[:idx] + (-1,) * (len(bucket_tuple) - idx)
                    temp_dict[modified_tuple] = temp_dict.get(modified_tuple, 0) + probability

        # Update the combined ratio dictionary with new tuples and their probabilities
        for new_tuple, new_probability in temp_dict.items():
            combined_ratio[new_tuple] = new_probability

        combined_filters_ratios.append(combined_ratio)  # Append the updated dictionary

    return combined_filters_ratios


In [None]:
combined_filter_ratios = combine_probabilities(filters_ratios)
combined_filter_ratios

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

def visualize_tree_dict_top_to_bottom(tree_dict):
    # Create directed graph
    G = nx.DiGraph()

    # Create nodes and edges for each tuple
    for node, weight in tree_dict.items():
        for parent, _ in tree_dict.items():
            # Check if parent tuple values count of -1 values is one more than node tuple
            if parent.count(-1) == node.count(-1) + 1:
                # Check that parent and node values are same at the position where they both don't have -1
                if all(p == n or p == -1 for p, n in zip(parent, node)):
                    G.add_edge(parent, node, weight=weight)

    # Drawing the graph
    # Using graphviz layout to arrange nodes from top to bottom
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw(G, pos, with_labels=True, node_size=700, node_color="lightblue")
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

    # Show the graph
    plt.show()

# Visualize the tree with the updated condition
visualize_tree_dict_top_to_bottom(combined_filter_ratios[0])