<a href="https://colab.research.google.com/github/akashgardas/Data-Science/blob/main/Association%20Rule%20Mining/Apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Apriori
Sample dataset is used

In [None]:
# installing dependencies
!pip install mlxtend



In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [None]:
# ------------------------------
# Step 1: Create a supermarket dataset
# ------------------------------

dataset = [
    ['Milk', 'Bread', 'Eggs'],
    ['Milk', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Eggs'],
    ['Bread', 'Diaper', 'Beer'],
    ['Milk', 'Bread', 'Diaper', 'Beer'],
    ['Bread', 'Eggs']
]

# Convert dataset to one-hot encoded dataframe
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Beer,Bread,Diaper,Eggs,Milk
0,False,True,False,True,True
1,False,True,False,False,True
2,True,False,True,True,True
3,True,True,True,False,False
4,True,True,True,False,True
5,False,True,False,True,False


In [None]:
# ------------------------------
# Step 2: Run Apriori Algorithm
# ------------------------------

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.5,(Beer)
1,0.833333,(Bread)
2,0.5,(Diaper)
3,0.5,(Eggs)
4,0.666667,(Milk)
5,0.333333,"(Bread, Beer)"
6,0.5,"(Diaper, Beer)"
7,0.333333,"(Milk, Beer)"
8,0.333333,"(Bread, Diaper)"
9,0.333333,"(Eggs, Bread)"


In [None]:
# ------------------------------
# Step 3: Generate Association Rules
# ------------------------------

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.3)
# Sort by confidence, then lift, then support
rules_sorted = rules.sort_values(by=['confidence', 'lift', 'support'], ascending=False)

# Select Top 10 rules
top10 = rules_sorted.head(10)
top10

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
2,(Diaper),(Beer),0.5,0.5,0.5,1.0,2.0,1.0,0.25,inf,1.0,1.0,1.0,1.0
3,(Beer),(Diaper),0.5,0.5,0.5,1.0,2.0,1.0,0.25,inf,1.0,1.0,1.0,1.0
16,"(Bread, Diaper)",(Beer),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
18,"(Bread, Beer)",(Diaper),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
22,"(Milk, Diaper)",(Beer),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
24,"(Milk, Beer)",(Diaper),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
11,(Milk),(Bread),0.666667,0.833333,0.5,0.75,0.9,1.0,-0.055556,0.666667,-0.25,0.5,-0.5,0.675
19,(Diaper),"(Bread, Beer)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333
21,(Beer),"(Bread, Diaper)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333
25,(Diaper),"(Milk, Beer)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333


In [None]:
# ------------------------------
# Step 4: Display Results
# ------------------------------
top10[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
2,(Diaper),(Beer),0.5,1.0,2.0
3,(Beer),(Diaper),0.5,1.0,2.0
16,"(Bread, Diaper)",(Beer),0.333333,1.0,2.0
18,"(Bread, Beer)",(Diaper),0.333333,1.0,2.0
22,"(Milk, Diaper)",(Beer),0.333333,1.0,2.0
24,"(Milk, Beer)",(Diaper),0.333333,1.0,2.0
11,(Milk),(Bread),0.5,0.75,0.9
19,(Diaper),"(Bread, Beer)",0.333333,0.666667,2.0
21,(Beer),"(Bread, Diaper)",0.333333,0.666667,2.0
25,(Diaper),"(Milk, Beer)",0.333333,0.666667,2.0
