In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load dataset
groceries_df = pd.read_csv('/content/Groceries.csv')

# Preprocess and group transactions
transactions = groceries_df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list).tolist()

# Encode transactions
te = TransactionEncoder()
transactions_encoded = pd.DataFrame(te.fit(transactions).transform(transactions), columns=te.columns_)

# Check item frequency to identify suitable support threshold
item_counts = pd.Series([item for sublist in transactions for item in sublist]).value_counts()
print(item_counts.head(15))

# Adjust min_support based on the item frequency (set it lower, e.g., 0.005)
frequent_itemsets = apriori(transactions_encoded, min_support=0.005, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Sort and display top 10 rules by lift
rules_sorted = rules.sort_values(by='lift', ascending=False).head(10)
print(rules_sorted)


whole milk          2502
other vegetables    1898
rolls/buns          1716
soda                1514
yogurt              1334
root vegetables     1071
tropical fruit      1032
bottled water        933
sausage              924
citrus fruit         812
pastry               785
pip fruit            744
shopping bags        731
canned beer          717
bottled beer         687
Name: count, dtype: int64
          antecedents         consequents  antecedent support  \
1       (frankfurter)  (other vegetables)            0.037760   
0  (other vegetables)       (frankfurter)            0.122101   
5            (yogurt)           (sausage)            0.085879   
4           (sausage)            (yogurt)            0.060349   
3           (sausage)              (soda)            0.060349   
2              (soda)           (sausage)            0.097106   

   consequent support   support  confidence      lift  representativity  \
1            0.122101  0.005146    0.136283  1.116150               