In [None]:
import pandas as pd
from itertools import combinations
from collections import defaultdict


df = pd.read_csv("/content/Groceries_dataset.csv")


transactions = df.groupby("Member_number")["itemDescription"].apply(list).tolist()
transactions = [set(t) for t in transactions]

num_transactions = len(transactions)
print("Total transactions:", num_transactions)


Total transactions: 3898


In [13]:
def support(itemset):

    count = sum(1 for t in transactions if itemset.issubset(t))
    return count / num_transactions


In [None]:
def candidates(prev_frequent, k):

    candidates = set()
    prev_list = list(prev_frequent)

    for i in range(len(prev_list)):
        for j in range(i + 1, len(prev_list)):
            L1 = sorted(list(prev_list[i]))
            L2 = sorted(list(prev_list[j]))

           
            if L1[:k-2] == L2[:k-2]:
                candidate = prev_list[i] | prev_list[j]
                if len(candidate) == k:
                    candidates.add(candidate)

    return candidates


In [None]:
def apriori(min_support=0.005):


    item_counts = defaultdict(int)

    for t in transactions:
        for item in t:
            item_counts[frozenset([item])] += 1

    L1 = {i for i, c in item_counts.items() if c / num_transactions >= min_support}
    frequent = {1: L1}

    k = 2
    Lk = L1

    while len(Lk) > 0:
        Ck = candidates(Lk, k)

        Lk_new = set()
        for cand in Ck:
            s = support(cand)
            if s >= min_support:
                Lk_new.add(cand)

        frequent[k] = Lk_new
        Lk = Lk_new
        k += 1

    return frequent


In [None]:
frequent_itemsets = apriori(min_support=0.005)

all_frequent = []
for k in frequent_itemsets:
    for itemset in frequent_itemsets[k]:
        all_frequent.append((itemset, support(itemset)))


all_frequent_sorted = sorted(all_frequent, key=lambda x: x[1], reverse=True)

print("Top 10 Frequent Itemsets:")
for itemset, sup in all_frequent_sorted[:10]:
    print(list(itemset), "=>", round(sup, 4))


Top 10 Frequent Itemsets:
['whole milk'] => 0.4582
['other vegetables'] => 0.3766
['rolls/buns'] => 0.3497
['soda'] => 0.3135
['yogurt'] => 0.283
['tropical fruit'] => 0.2337
['root vegetables'] => 0.2306
['bottled water'] => 0.2137
['sausage'] => 0.206
['other vegetables', 'whole milk'] => 0.1914


In [23]:
def rules(min_conf=0.3):

    rules = []

    for itemset, sup in all_frequent:
        if len(itemset) < 2:
            continue

        items = list(itemset)

        for r in range(1, len(items)):
            for A in combinations(items, r):
                A = set(A)
                B = itemset - A

                sup_A = support(A)
                sup_B = support(B)

                conf = sup / sup_A
                prior = sup_B
                interestingness = abs(conf - prior)

                if conf >= min_conf:
                    rules.append({
                        "A": A,
                        "B": B,
                        "support": sup,
                        "confidence": conf,
                        "prior": prior,
                        "interestingness": interestingness
                    })

    return rules


In [24]:
rules = rules(min_conf=0.3)

rules_sorted = sorted(rules, key=lambda x: x["interestingness"], reverse=True)

print("Top 5 Rules by Interestingness:")
for r in rules_sorted[:5]:
    print(
        f"{list(r['A'])} → {list(r['B'])} | "
        f"support={round(r['support'],4)}, "
        f"conf={round(r['confidence'],4)}, "
        f"interestingness={round(r['interestingness'],4)}"
    )


Top 5 Rules by Interestingness:
['pastry', 'yogurt', 'ham'] → ['whole milk'] | support=0.0054, conf=0.9545, interestingness=0.4964
['other vegetables', 'meat', 'domestic eggs'] → ['whole milk'] | support=0.0056, conf=0.88, interestingness=0.4218
['frankfurter', 'coffee', 'root vegetables'] → ['rolls/buns'] | support=0.0051, conf=0.7692, interestingness=0.4196
['pip fruit', 'pork', 'bottled water'] → ['other vegetables'] | support=0.0059, conf=0.7931, interestingness=0.4165
['pastry', 'other vegetables', 'whole milk', 'sausage'] → ['rolls/buns'] | support=0.0082, conf=0.7619, interestingness=0.4122
