Step 1: Import Packages and Dataset

In [1]:
from collections import defaultdict
from itertools import combinations
transactions = {
    "T1": ["Bread", "Butter", "Jam"],
    "T2": ["Butter", "Coke"],
    "T3": ["Butter", "Milk"],
    "T4": ["Bread", "Butter", "Coke"],
    "T5": ["Bread", "Milk"],
    "T6": ["Butter", "Milk"],
    "T7": ["Bread", "Milk"],
    "T8": ["Bread", "Butter", "Milk", "Jam"],
    "T9": ["Bread", "Butter", "Milk"]
}
min_support = 2

Step 2: Generate Tidsets (Vertical representation)

In [2]:
def generate_tidsets(transactions):
    item_tidset = defaultdict(set)
    for tid, items in transactions.items():
        for item in items:
            item_tidset[item].add(tid)
    return item_tidset

item_tidset = generate_tidsets(transactions)

for item, tidset in item_tidset.items():
    print(item, ":", sorted(tidset))

Bread : ['T1', 'T4', 'T5', 'T7', 'T8', 'T9']
Butter : ['T1', 'T2', 'T3', 'T4', 'T6', 'T8', 'T9']
Jam : ['T1', 'T8']
Coke : ['T2', 'T4']
Milk : ['T3', 'T5', 'T6', 'T7', 'T8', 'T9']


Step 3: Prepare a sorted list of items

In [3]:
items = sorted(item_tidset.items(), key=lambda x: len(x[1]))

Step 4: Implement recursive ECLAT

In [4]:
def eclat(prefix, items, min_support, frequent_itemsets):
    """
    prefix: list of items forming the current prefix
    items: list of tuples (item, tidset) to consider for extension
    min_support: absolute minimum support (count)
    frequent_itemsets: dict to collect results {frozenset(itemset): support_count}
    """
    while items:
        item, tidset = items.pop()
        support = len(tidset)
        if support >= min_support:
            new_itemset = prefix + [item]
            frequent_itemsets[frozenset(new_itemset)] = support
            suffix = []
            for other_item, other_tidset in items:
                intersection = tidset & other_tidset
                if len(intersection) >= min_support:
                    suffix.append((other_item, intersection))
            suffix = sorted(suffix, key=lambda x: len(x[1]))
            eclat(new_itemset, suffix, min_support, frequent_itemsets)

Step 5: Run ECLAT and collect frequent itemsets

In [5]:
item_tidset = generate_tidsets(transactions)
items = sorted(item_tidset.items(), key=lambda x: len(x[1]))
frequent_itemsets = {}
eclat([], items, min_support, frequent_itemsets)

print("Frequent itemsets (as list) -> support count")
for itemset, support in sorted(frequent_itemsets.items(), key=lambda x: (-len(x[0]), -x[1], sorted(list(x[0])))):
    print(list(itemset), "=>", support)

Frequent itemsets (as list) -> support count
['Bread', 'Butter', 'Jam'] => 2
['Bread', 'Milk', 'Butter'] => 2
['Bread', 'Butter'] => 4
['Bread', 'Milk'] => 4
['Milk', 'Butter'] => 4
['Bread', 'Jam'] => 2
['Butter', 'Coke'] => 2
['Jam', 'Butter'] => 2
['Butter'] => 7
['Bread'] => 6
['Milk'] => 6
['Coke'] => 2
['Jam'] => 2
