<a href="https://colab.research.google.com/github/gowripreetham/SJSU_Pycaret/blob/main/Association_Rule_Mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# STEP 1 â€” confirm your Python version (should be 3.12)
import sys
print(sys.version)

# STEP 2 â€” upgrade pip and dependencies
!pip install -U pip setuptools wheel scikit-learn>=1.4

# STEP 3 â€” install PyCaret directly from GitHub (latest main/master branch supports 3.12)
!pip install -U git+https://github.com/pycaret/pycaret.git@master

In [None]:
from pycaret.datasets import get_data
data = get_data('germany')

In [None]:
data.shape

In [None]:
!pip install mlxtend

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load your retail dataset
# Assuming you already have it loaded as 'data'
print("âœ… Data Loaded:", data.shape)
data.head()


In [None]:
# Drop missing values
data = data.dropna(subset=['InvoiceNo', 'Description'])

# Ensure InvoiceNo is string (important for grouping)
data['InvoiceNo'] = data['InvoiceNo'].astype(str)

# Keep only positive quantities
if 'Quantity' in data.columns:
    data = data[data['Quantity'] > 0]

print("ðŸ§¹ Cleaned data:", data.shape)


In [None]:
# Create basket: each transaction is a row, each item a column
basket = (data
          .groupby(['InvoiceNo', 'Description'])['Description']
          .count()
          .unstack()
          .reset_index()
          .fillna(0)
          .set_index('InvoiceNo'))

# Convert counts to binary 1/0
basket = basket.applymap(lambda x: 1 if x > 0 else 0)
print("âœ… Basket matrix ready:", basket.shape)
basket.head()


In [None]:
frequent_itemsets = apriori(basket, min_support=0.03, use_colnames=True)
print("âœ… Frequent Itemsets found:", frequent_itemsets.shape)
frequent_itemsets.head()


In [None]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
print("âœ… Rules generated:", rules.shape)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,6))
plt.scatter(rules['support'], rules['confidence'], alpha=0.7, color='teal')
plt.title('Support vs Confidence')
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()


In [None]:
rules.to_csv("association_rules_output.csv", index=False)
print("ðŸ’¾ Rules saved to association_rules_output.csv")
