<a href="https://colab.research.google.com/github/asrafulasf72/Data-Mining-Algorithm/blob/main/Apriori_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from itertools import combinations, chain
from collections import defaultdict

In [None]:
def get_frequent_1_itemsets(transactions, min_support_count):
    counts = defaultdict(int)
    for t in transactions:
        for item in t:
            counts[frozenset([item])] += 1
    return {itemset: cnt for itemset, cnt in counts.items() if cnt >= min_support_count}

In [None]:
def has_infrequent_subset(candidate, prev_freq_itemsets):
    k = len(candidate)
    for subset in combinations(candidate, k - 1):
        if frozenset(subset) not in prev_freq_itemsets:
            return True
    return False

In [None]:
def generate_candidates(prev_freq_itemsets, k):
    prev_items = list(prev_freq_itemsets.keys())
    candidates = set()
    n = len(prev_items)
    for i in range(n):
        for j in range(i + 1, n):
            union = prev_items[i] | prev_items[j]
            if len(union) == k and not has_infrequent_subset(union, prev_freq_itemsets):
                candidates.add(union)
    return candidates

In [None]:
def count_supports(transactions, candidates):
    counts = defaultdict(int)
    for t in transactions:
        tset = set(t)
        for c in candidates:
            if c.issubset(tset):
                counts[c] += 1
    return counts

In [None]:
def apriori(transactions, min_support=0.5, min_confidence=0.7):
    """
    transactions: list of transactions (each transaction is a list or set)
    min_support: relative threshold (0–1)
    min_confidence: relative threshold (0–1)
    returns: frequent itemsets and association rules
    """
    n_transactions = len(transactions)
    min_support_count = max(1, int(min_support * n_transactions))

    # Step 1: Frequent 1-itemsets
    L1 = get_frequent_1_itemsets(transactions, min_support_count)
    frequent_itemsets = dict(L1)

    k = 2
    prev_L = L1
    while prev_L:
        candidates = generate_candidates(prev_L, k)
        if not candidates:
            break

        candidate_counts = count_supports(transactions, candidates)
        Lk = {c: cnt for c, cnt in candidate_counts.items() if cnt >= min_support_count}

        frequent_itemsets.update(Lk)
        prev_L = Lk
        k += 1

    support_counts = frequent_itemsets
    rules = []
    for itemset, itemset_count in support_counts.items():
        if len(itemset) < 2:
            continue

        subsets = chain.from_iterable(combinations(itemset, r) for r in range(1, len(itemset)))
        for ante in subsets:
            ante = frozenset(ante)
            cons = itemset - ante
            if not cons:
                continue

            support = itemset_count / n_transactions
            ante_count = support_counts.get(ante)
            if ante_count is None or ante_count == 0:
                continue

            confidence = itemset_count / ante_count
            if confidence >= min_confidence:
                rules.append((ante, cons, round(support, 4), round(confidence, 4)))

    rules.sort(key=lambda x: (x[3], x[2]), reverse=True)
    return frequent_itemsets, rules

In [None]:
if __name__ == "__main__":
    transactions = [
        ['milk', 'bread', 'butter'],
        ['beer', 'bread'],
        ['milk', 'bread', 'butter', 'beer'],
        ['bread', 'butter'],
        ['milk', 'bread'],
        ['beer', 'butter']
    ]

    # You can adjust these values to test
    min_support = 0.5
    min_confidence = 0.7

    freq_itemsets, rules = apriori(transactions, min_support, min_confidence)

    print("=== Frequent Itemsets ===")
    for itemset, count in sorted(freq_itemsets.items(), key=lambda x: (-len(x[0]), -x[1])):
        print(f"{set(itemset)} -> support count: {count}")

    print("\n=== Association Rules ===")
    for ante, cons, support, confidence in rules:
        print(f"{set(ante)} -> {set(cons)} | support={support:.2f}, confidence={confidence:.2f}")

=== Frequent Itemsets ===
{'milk', 'bread'} -> support count: 3
{'bread', 'butter'} -> support count: 3
{'bread'} -> support count: 5
{'butter'} -> support count: 4
{'milk'} -> support count: 3
{'beer'} -> support count: 3

=== Association Rules ===
{'milk'} -> {'bread'} | support=0.50, confidence=1.00
{'butter'} -> {'bread'} | support=0.50, confidence=0.75
