In [1]:
#Huffman tree and coding
import heapq
import graphviz
import json

class HuffmanNode:
    def __init__(self, probability, symbols=None):
        self.probability = probability
        self.symbols = symbols
        self.children = []

    def __lt__(self, other):
        return self.probability < other.probability

def build_huffman_tree(probabilities):
    nodes = [HuffmanNode(probability, [symbol]) for symbol, probability in probabilities.items()]
    heapq.heapify(nodes)

    while len(nodes) > 4:
        group = [heapq.heappop(nodes) for _ in range(4)]
        parent = HuffmanNode(sum(n.probability for n in group))
        parent.children = group
        heapq.heappush(nodes, parent)

    # Create root node
    root = HuffmanNode(sum(n.probability for n in nodes))
    root.children = nodes
    return root

def generate_tree_image(node, encoding_dict=None):
    tree_graph = graphviz.Digraph()
    generate_node_image(node, tree_graph, '', 0, encoding_dict=encoding_dict)
    return tree_graph

def generate_node_image(node, tree_graph, parent_address, count, encoding_dict=None):
    if node.symbols is not None:
        label = f'{node.symbols[0]} ({node.probability*100:.2f}%)'
        if node.symbols[0] in ['A', 'T', 'G', 'C']:
            label += ' ' + node.symbols[0]
        if encoding_dict is not None and node.symbols[0] in encoding_dict:
            label += f'\nHuffman code: {encoding_dict[node.symbols[0]]}'
    else:
        label = f'{node.probability*100:.2f}%'
    address = parent_address + '.' + str(count)
    tree_graph.node(address, label=label)
    if parent_address:
        tree_graph.edge(parent_address, address, label='')
    if node.children:
        for i, child in enumerate(node.children):
            generate_node_image(child, tree_graph, address, i, encoding_dict=encoding_dict)


with open('mydic.json') as f:
    mydic = json.load(f)


with open('mydict.json') as f:
    mydict = json.load(f)

with open('encoding_dict.json') as f:
    encoding_dict = json.load(f)
# Example usage
probabilities = mydict
tree = build_huffman_tree(probabilities)

# Generate codebook from tree

# Generate image of Huffman tree with encoding
tree_image = generate_tree_image(tree, encoding_dict=encoding_dict)
tree_image.view()


'Digraph.gv.pdf'