# 🔧 Install Dependencies

In [5]:
pip install mlxtend pandas



# 🧾 Sample Dataset (Market Basket)

In [6]:
import pandas as pd

dataset = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Bread'],
    ['Milk', 'Butter'],
    ['Bread', 'Butter', 'Jam']
]

# Convert to one-hot encoding
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_data, columns=te.columns_)
print(df)


   Bread  Butter    Jam   Milk
0   True    True  False   True
1   True    True  False  False
2   True   False  False   True
3  False    True  False   True
4   True    True   True  False


# 🧮 1. Using Apriori Algorithm

In [7]:
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)

# Step 2: Generate rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

print("\nApriori Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



Apriori Rules:
  antecedents consequents  support  confidence      lift
0    (Butter)     (Bread)      0.6    0.750000  0.937500
1     (Bread)    (Butter)      0.6    0.750000  0.937500
2      (Milk)     (Bread)      0.4    0.666667  0.833333
3      (Milk)    (Butter)      0.4    0.666667  0.833333


# ⚡ 2. Using FP-Growth Algorithm

In [8]:
from mlxtend.frequent_patterns import fpgrowth

frequent_itemsets_fp = fpgrowth(df, min_support=0.4, use_colnames=True)

rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.6)

print("\nFP-Growth Rules:")
print(rules_fp[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



FP-Growth Rules:
  antecedents consequents  support  confidence      lift
0    (Butter)     (Bread)      0.6    0.750000  0.937500
1     (Bread)    (Butter)      0.6    0.750000  0.937500
2      (Milk)     (Bread)      0.4    0.666667  0.833333
3      (Milk)    (Butter)      0.4    0.666667  0.833333


# **Implement the Apriori algorithm in Python using a real-world dataset**

Implement the Apriori algorithm in Python using a real-world dataset, you can utilize the Groceries dataset, which contains over 9,800 transactions from a German retailer. This dataset is widely used for market basket analysis and is available on Kaggle.

In [20]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# Load the dataset
df = pd.read_csv('Groceries_dataset.csv')

# Group items by transaction (Member_number and Date)
df['Transaction'] = df['Member_number'].astype(str) + '_' + df['Date']
transactions = df.groupby('Transaction')['itemDescription'].apply(list).tolist()

# Encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)


In [21]:
from mlxtend.frequent_patterns import apriori

# Generate frequent itemsets with a minimum support of 0.01
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)
print(frequent_itemsets.head())


    support        itemsets
0  0.021386      (UHT-milk)
1  0.033950          (beef)
2  0.021787       (berries)
3  0.016574     (beverages)
4  0.045312  (bottled beer)


In [25]:
from mlxtend.frequent_patterns import association_rules

# Generate association rules with a minimum confidence of 0.0
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.0)
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head())


          antecedents         consequents   support  confidence      lift
0  (other vegetables)        (rolls/buns)  0.010559    0.086481  0.786154
1        (rolls/buns)  (other vegetables)  0.010559    0.095990  0.786154
2  (other vegetables)        (whole milk)  0.014837    0.121511  0.769430
3        (whole milk)  (other vegetables)  0.014837    0.093948  0.769430
4        (whole milk)        (rolls/buns)  0.013968    0.088447  0.804028
