In [7]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Sample transaction data
transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Bread'],
    ['Milk', 'Butter'],
    ['Bread', 'Butter']
]

# Step 1: Convert transaction data into a one-hot encoded DataFrame
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

# Step 2: Find frequent itemsets with minimum support = 0.6
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)

# Step 3: Generate association rules with minimum confidence = 0.7
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print("\nAssociation Rules:")
print(rules[['support', 'confidence', 'lift']])

Frequent Itemsets:
   support         itemsets
0      0.8          (Bread)
1      0.8         (Butter)
2      0.6           (Milk)
3      0.6  (Bread, Butter)

Association Rules:
   support  confidence    lift
0      0.6        0.75  0.9375
1      0.6        0.75  0.9375


In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load the dataset
df = pd.read_csv('Groceries_dataset.csv')

# Combine Member_number and Date to define unique transactions
df['Transaction'] = df['Member_number'].astype(str) + "_" + df['Date']

# Create a list of item lists for each transaction
transactions = df.groupby('Transaction')['itemDescription'].apply(list).tolist()

# Encode transactions into a one-hot DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Generate frequent itemsets (lower min_support if needed)
frequent_itemsets = apriori(df_encoded, min_support=0.005, use_colnames=True)

# Generate association rules (confidence threshold lowered to ensure results)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.8)

# Select only relevant columns
rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

# Sort rules by lift descending
rules = rules.sort_values(by='lift', ascending=False)

# Convert frozensets to readable strings
rules['antecedents'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
rules['consequents'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))

# Show count of rules found
print(f"\nTotal rules generated: {len(rules)}")

# Print top 10 rules if available
if not rules.empty:
    print("\nTop 10 Association Rules:")
    for i, row in rules.head(10).iterrows():
        print(f"{row['antecedents']} => {row['consequents']} | "
              f"Support: {row['support']:.3f}, Confidence: {row['confidence']:.3f}, Lift: {row['lift']:.3f}")
else:
    print("\n No rules found. Try lowering min_support further (e.g., 0.003).")



Total rules generated: 44

Top 10 Association Rules:
frankfurter => other vegetables | Support: 0.005, Confidence: 0.136, Lift: 1.116
other vegetables => frankfurter | Support: 0.005, Confidence: 0.042, Lift: 1.116
sausage => yogurt | Support: 0.006, Confidence: 0.095, Lift: 1.109
yogurt => sausage | Support: 0.006, Confidence: 0.067, Lift: 1.109
soda => sausage | Support: 0.006, Confidence: 0.061, Lift: 1.015
sausage => soda | Support: 0.006, Confidence: 0.099, Lift: 1.015
bottled beer => whole milk | Support: 0.007, Confidence: 0.158, Lift: 0.999
whole milk => bottled beer | Support: 0.007, Confidence: 0.045, Lift: 0.999
sausage => whole milk | Support: 0.009, Confidence: 0.148, Lift: 0.940
whole milk => sausage | Support: 0.009, Confidence: 0.057, Lift: 0.940
