In [14]:
from itertools import combinations
from encoding import (
    create_huffman_prefix_mapping,
    encode_transaction,
    decode_transaction
)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
transactions = [
    {'birra', 'sacchetto'},
    {'birra', 'sacchetto'},
    {'birra', 'pizza'},
    {'formaggio', 'sacchetto', 'vino'},
    {'birra', 'formaggio', 'patatine', 'sacchetto'},
    {'birra', 'formaggio', 'patatine'},
    {'formaggio', 'pasta', 'pomodoro', 'sacchetto'},
    {'cioccolato', 'cipolla', 'funghi', 'panna', 'pasta', 'sale'}
]

transactions = [set(t) for t in transactions]
transactions

[{'birra', 'sacchetto'},
 {'birra', 'sacchetto'},
 {'birra', 'pizza'},
 {'formaggio', 'sacchetto', 'vino'},
 {'birra', 'formaggio', 'patatine', 'sacchetto'},
 {'birra', 'formaggio', 'patatine'},
 {'formaggio', 'pasta', 'pomodoro', 'sacchetto'},
 {'cioccolato', 'cipolla', 'funghi', 'panna', 'pasta', 'sale'}]

# Create prefix codes (Huffman tree)

In [22]:
item2code_mapping = create_huffman_prefix_mapping(transactions)
item2code_mapping

{'birra': '00',
 'sacchetto': '01',
 'cioccolato': '10000',
 'cipolla': '10001',
 'funghi': '10010',
 'panna': '10011',
 'formaggio': '101',
 'pasta': '1100',
 'patatine': '1101',
 'pizza': '11100',
 'pomodoro': '11101',
 'sale': '11110',
 'vino': '11111'}

In [24]:
encoded = encode_transaction(
    transaction=['pizza', 'vino', 'birra', 'patatine', 'sale', 'formaggio'], 
    item2code_mapping=item2code_mapping
)
encoded

'111001111100110111110101'

In [25]:
decoded = decode_transaction(
    transaction_code=encoded, 
    item2code_mapping=item2code_mapping
)
decoded

['pizza', 'vino', 'birra', 'patatine', 'sale', 'formaggio']

In [28]:
class TreeNode:
    def __init__(self, value, code):
        self.value = value
        self.code = code
        self.left = None
        self.right = None

def build_binary_tree(mapping):
    root = TreeNode('root', None)

    for key, value in mapping.items():
        current = root

        for direction in value:
            if direction == '0':
                if not current.left:
                    current.left = TreeNode(None, None)
                current = current.left
            elif direction == '1':
                if not current.right:
                    current.right = TreeNode(None, None)
                current = current.right

        current.value = key
        current.code = value

    return root

def print_tree(root, level=0, prefix="Root: "):
    if root:
        value_str = root.value if root.value is not None else "None"
        code_str = root.code if root.code is not None else "None"
        print(" " * (level * 4) + prefix + f"{value_str} ({code_str})")
        if root.left or root.right:
            print_tree(root.left, level + 1, "L----- ")
            print_tree(root.right, level + 1, "R----- ")



root_node = build_binary_tree(item2code_mapping)
print_tree(root_node)


Root: root (None)
    L----- None (None)
        L----- birra (00)
        R----- sacchetto (01)
    R----- None (None)
        L----- None (None)
            L----- None (None)
                L----- None (None)
                    L----- cioccolato (10000)
                    R----- cipolla (10001)
                R----- None (None)
                    L----- funghi (10010)
                    R----- panna (10011)
            R----- formaggio (101)
        R----- None (None)
            L----- None (None)
                L----- pasta (1100)
                R----- patatine (1101)
            R----- None (None)
                L----- None (None)
                    L----- pizza (11100)
                    R----- pomodoro (11101)
                R----- None (None)
                    L----- sale (11110)
                    R----- vino (11111)


In [16]:
# to optimize

In [17]:
def decode_transaction(transaction_code, item_codes):
    decoded_items = []

    while transaction_code:
        max_match_length = 0
        matched_item = None

        for item, code in item_codes.items():
            if transaction_code.startswith(code) and len(code) > max_match_length:
                max_match_length = len(code)
                matched_item = item

        if matched_item is not None:
            decoded_items.append(matched_item)
            transaction_code = transaction_code[max_match_length:]
        else:
            # If no match is found, break the loop
            break

    return decoded_items


result = decode_transaction(encoded, prefix_codes)
print(result)


['birra', 'funghi', 'vino', 'patatine', 'cioccolato']


In [15]:
def encode_all_transaction(transactions, item_codes_dict):

    encoded_transactions = []
    for transaction in transactions:
        encoded_transaction = encode_transaction(transaction, item_codes_dict)
        encoded_transactions.append(encoded_transaction)

    return encoded_transactions

encode_all_transaction(transactions, item_codes_dict)

['1101011',
 '1101011',
 '1101100',
 '1111011011',
 '11010110001011',
 '1101011000',
 '1011101010101',
 '011001101011100']

In [None]:
# TODO

In [25]:
# NOT OPTIMAL

In [30]:
def candidates(busket, k):
    return [set(e) for e in combinations(busket, k)]

candidates(basket, k=1)

[{'panna'},
 {'funghi'},
 {'pomodoro'},
 {'sale'},
 {'cioccolato'},
 {'formaggio'},
 {'birra'},
 {'vino'},
 {'patatine'},
 {'cipolla'},
 {'pasta'},
 {'sacchetto'},
 {'pizza'}]

In [87]:
def is_frequent(candidate, transactions, support_threshold):
    count = sum(1 for t in transactions if candidate.issubset(t))
    return count >= support_threshold

[c for c in candidates(basket, 1) if is_frequent(c, transactions, 3)]

[{'formaggio'}, {'birra'}, {'sacchetto'}]

In [85]:
def apriori(basket, transactions, support_threshold):
    frequent_items = {}
    
    # Initialize with frequent 1-itemsets
    frequent_items[1] = [set([item]) for item in basket if sum(1 for t in transactions if item in t) >= support_threshold]

    k = 1
    while len(frequent_items[k]) > 0:
        k += 1
        candidates_k = candidates(basket, k)
        frequent_items[k] = [c for c in candidates_k if is_frequent(c, transactions, support_threshold)]

    return frequent_items


In [86]:
frequent = apriori(basket, transactions, 2)

for k in frequent:
    print(f'level {k}: {frequent[k]}')

level 1: [{'formaggio'}, {'birra'}, {'patatine'}, {'pasta'}, {'sacchetto'}]
level 2: [{'birra', 'formaggio'}, {'formaggio', 'patatine'}, {'formaggio', 'sacchetto'}, {'birra', 'patatine'}, {'birra', 'sacchetto'}]
level 3: [{'birra', 'formaggio', 'patatine'}]
level 4: []
