In [1]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from apriori import Apriori
import pandas as pd

In [4]:
import time

def test_transactions(transactions, min_support, min_confidence):
    te = TransactionEncoder()
    df = pd.DataFrame(te.fit(transactions).transform(transactions), columns=te.columns_)

    start_mlxtend = time.time()
    frequent_itemsets_mlxtend = apriori(df, min_support=min_support, use_colnames=True)
    rules_mlxtend = association_rules(frequent_itemsets_mlxtend, metric='confidence', min_threshold=min_confidence)
    end_mlxtend = time.time()

    transactions_sets = [set(t) for t in transactions]
    start_custom = time.time()
    apriori_custom = Apriori(transactions_sets, min_support=min_support, min_confidence=min_confidence)
    rules_custom = apriori_custom.find_rules()
    end_custom = time.time()

    print("\n[mlxtend] Frequent Itemsets:")
    print(frequent_itemsets_mlxtend)

    print("\n[custom] Frequent Itemsets:")
    apriori_custom.print_frequent_itemsets_like_mlxtend()

    print("\n[mlxtend] Rules:")
    for _, row in rules_mlxtend.iterrows():
        print(f"{set(row['antecedents'])} => {set(row['consequents'])}, confidence: {row['confidence']:.2f}")

    print("\n[custom] Rules:")
    apriori_custom.print_rules(rules_custom)

    def normalize_rules(rules):
        return sorted([
            (frozenset(a), frozenset(c), round(conf, 2))
            for a, c, conf in rules
        ])

    def normalize_mlxtend(rules_df):
        return sorted([
            (frozenset(row['antecedents']), frozenset(row['consequents']), round(row['confidence'], 2))
            for _, row in rules_df.iterrows()
        ])

    print("\n[Comparison Result]")
    custom_rules_set = set(normalize_rules(rules_custom))
    mlxtend_rules_set = set(normalize_mlxtend(rules_mlxtend))

    if custom_rules_set == mlxtend_rules_set:
        print("✅ Both implementations produce equivalent rules.")
    else:
        print("❌ Rules differ.")
        print("Only in custom:", custom_rules_set - mlxtend_rules_set)
        print("Only in mlxtend:", mlxtend_rules_set - custom_rules_set)

    return start_custom, end_custom, start_mlxtend, end_mlxtend

In [5]:
transactions = [
    {'a', 'b', 'c', 'd', 'e'},
    {'a', 'c', 'd', 'f'},
    {'a', 'b', 'c', 'd', 'e', 'g'},
    {'c', 'd', 'e', 'f'},
    {'c', 'e', 'f', 'h'},
    {'d', 'e', 'f'},
    {'a', 'f', 'g'},
    {'d', 'e', 'g', 'h'},
    {'a', 'b', 'c', 'f'},
    {'c', 'd', 'e', 'h'}
]

min_support = 0.4
min_confidence = 0.5

test_transactions(transactions=transactions, min_support=min_support, min_confidence=min_confidence)


[mlxtend] Frequent Itemsets:
    support   itemsets
0       0.5        (a)
1       0.7        (c)
2       0.7        (d)
3       0.7        (e)
4       0.6        (f)
5       0.4     (c, a)
6       0.5     (c, d)
7       0.5     (c, e)
8       0.4     (c, f)
9       0.6     (e, d)
10      0.4  (c, d, e)

[custom] Frequent Itemsets:
    support   itemsets
0       0.5       (a,)
1       0.7       (c,)
2       0.7       (d,)
3       0.7       (e,)
4       0.6       (f,)
5       0.4     (a, c)
6       0.5     (c, d)
7       0.5     (c, e)
8       0.4     (c, f)
9       0.6     (d, e)
10      0.4  (c, d, e)

[mlxtend] Rules:
{'c'} => {'a'}, confidence: 0.57
{'a'} => {'c'}, confidence: 0.80
{'c'} => {'d'}, confidence: 0.71
{'d'} => {'c'}, confidence: 0.71
{'c'} => {'e'}, confidence: 0.71
{'e'} => {'c'}, confidence: 0.71
{'c'} => {'f'}, confidence: 0.57
{'f'} => {'c'}, confidence: 0.67
{'e'} => {'d'}, confidence: 0.86
{'d'} => {'e'}, confidence: 0.86
{'c', 'd'} => {'e'}, confidence: 0.80
{'c

(1746387293.2407172, 1746387293.2408388, 1746387293.235141, 1746387293.2407062)

In [6]:
path = 'groceries.csv'
df = pd.read_csv(path)
df = df.iloc[:, 1:]
transactions = df.values
transactions = [set(item for item in transaction if pd.notna(item)) for transaction in transactions]


In [7]:
transactions

[{'citrus fruit', 'margarine', 'ready soups', 'semi-finished bread'},
 {'coffee', 'tropical fruit', 'yogurt'},
 {'whole milk'},
 {'cream cheese', 'meat spreads', 'pip fruit', 'yogurt'},
 {'condensed milk',
  'long life bakery product',
  'other vegetables',
  'whole milk'},
 {'abrasive cleaner', 'butter', 'rice', 'whole milk', 'yogurt'},
 {'rolls/buns'},
 {'UHT-milk',
  'bottled beer',
  'liquor (appetizer)',
  'other vegetables',
  'rolls/buns'},
 {'potted plants'},
 {'cereals', 'whole milk'},
 {'bottled water',
  'chocolate',
  'other vegetables',
  'tropical fruit',
  'white bread'},
 {'bottled water',
  'butter',
  'citrus fruit',
  'curd',
  'dishes',
  'flour',
  'tropical fruit',
  'whole milk',
  'yogurt'},
 {'beef'},
 {'frankfurter', 'rolls/buns', 'soda'},
 {'chicken', 'tropical fruit'},
 {'butter', 'fruit/vegetable juice', 'newspapers', 'sugar'},
 {'fruit/vegetable juice'},
 {'packaged fruit/vegetables'},
 {'chocolate'},
 {'specialty bar'},
 {'other vegetables'},
 {'butter mi

In [9]:
min_support = 0.02
min_confidence = 0.2

custom_start, custom_end, mlxtend_start, mlxtend_end = test_transactions(transactions=transactions, min_support=min_support, min_confidence=min_confidence)


[mlxtend] Frequent Itemsets:
      support                                         itemsets
0    0.033452                                       (UHT-milk)
1    0.052466                                           (beef)
2    0.033249                                        (berries)
3    0.026029                                      (beverages)
4    0.080529                                   (bottled beer)
..        ...                                              ...
117  0.032232                 (whole milk, whipped/sour cream)
118  0.020742                     (yogurt, whipped/sour cream)
119  0.056024                             (yogurt, whole milk)
120  0.023183  (whole milk, other vegetables, root vegetables)
121  0.022267           (other vegetables, yogurt, whole milk)

[122 rows x 2 columns]

[custom] Frequent Itemsets:
     support                                         itemsets
0       0.03                                      (UHT-milk,)
1       0.05                         

In [10]:
print(f"Custom Apriori took: {custom_end - custom_start:.2f}s")
print(f"\nMlxtend Apriori took: {mlxtend_end - mlxtend_start:.2f}s")

Custom Apriori took: 0.73s

Mlxtend Apriori took: 0.08s
