In [10]:
from itertools import combinations
from encoding import (
    create_huffman_prefix_mapping,
    encode_transaction,
    decode_transaction,
    encode_all_transactions
)
from utils import build_binary_tree, print_tree

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
transactions = [
    {'birra', 'sacchetto'},
    {'birra', 'sacchetto'},
    {'birra', 'pizza'},
    {'formaggio', 'sacchetto', 'vino'},
    {'birra', 'formaggio', 'patatine', 'sacchetto'},
    {'birra', 'formaggio', 'patatine'},
    {'formaggio', 'pasta', 'pomodoro', 'sacchetto'},
    {'cioccolato', 'cipolla', 'funghi', 'panna', 'pasta', 'sale'}
]

transactions = [set(t) for t in transactions]
transactions

[{'birra', 'sacchetto'},
 {'birra', 'sacchetto'},
 {'birra', 'pizza'},
 {'formaggio', 'sacchetto', 'vino'},
 {'birra', 'formaggio', 'patatine', 'sacchetto'},
 {'birra', 'formaggio', 'patatine'},
 {'formaggio', 'pasta', 'pomodoro', 'sacchetto'},
 {'cioccolato', 'cipolla', 'funghi', 'panna', 'pasta', 'sale'}]

# Create prefix codes (Huffman tree)

In [12]:
item2code_mapping = create_huffman_prefix_mapping(transactions)
item2code_mapping

{'birra': '00',
 'sacchetto': '01',
 'cioccolato': '10000',
 'cipolla': '10001',
 'funghi': '10010',
 'panna': '10011',
 'formaggio': '101',
 'pasta': '1100',
 'patatine': '1101',
 'pizza': '11100',
 'pomodoro': '11101',
 'sale': '11110',
 'vino': '11111'}

In [13]:
root_node = build_binary_tree(item2code_mapping)
print_tree(root_node)

Root: root (None)
    L --- None (None)
        L --- birra (00)
        R --- sacchetto (01)
    R --- None (None)
        L --- None (None)
            L --- None (None)
                L --- None (None)
                    L --- cioccolato (10000)
                    R --- cipolla (10001)
                R --- None (None)
                    L --- funghi (10010)
                    R --- panna (10011)
            R --- formaggio (101)
        R --- None (None)
            L --- None (None)
                L --- pasta (1100)
                R --- patatine (1101)
            R --- None (None)
                L --- None (None)
                    L --- pizza (11100)
                    R --- pomodoro (11101)
                R --- None (None)
                    L --- sale (11110)
                    R --- vino (11111)


In [14]:
encoded = encode_transaction(
    transaction=['pizza', 'vino', 'birra', 'patatine', 'sale', 'formaggio'], 
    item2code_mapping=item2code_mapping
)
encoded

'111001111100110111110101'

In [15]:
decoded = decode_transaction(
    transaction_code=encoded, 
    item2code_mapping=item2code_mapping
)
decoded

['pizza', 'vino', 'birra', 'patatine', 'sale', 'formaggio']

In [16]:
encode_all_transactions(transactions, item2code_mapping)

['0001',
 '0001',
 '1110000',
 '1111110101',
 '00110110101',
 '110110100',
 '11101110010101',
 '10000100101000111110110010011']