## How it works?
- Button-up fashion
- Must set the **Minimum Support Threshold**: Means an itemset must appear in at least 2 transactions to be "frequent".
- Rules are created to test against **Confidence threshold**.
- Eg: Rule: {Diapers, Eggs} $\to$ {Bread}; Confidence = Support({Bread, Diapers, Eggs}) / Support({Diapers, Eggs})
- **Lift**: Tells if it is by random chance.

In [3]:
# pip install mlxtend

In [5]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [15]:
data =  [
    ['Milk', 'Bread', 'Eggs'],
    ['Milk', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Eggs'],
    ['Bread', 'Diaper', 'Beer'],
    ['Milk', 'Bread', 'Diaper', 'Beer'],
    ['Bread', 'Eggs']
]
# df = pd.DataFrame(data) # don't do this
# df

In [8]:
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
te_ary

array([[False,  True, False,  True,  True],
       [False,  True, False, False,  True],
       [ True, False,  True,  True,  True],
       [ True,  True,  True, False, False],
       [ True,  True,  True, False,  True],
       [False,  True, False,  True, False]])

In [17]:
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Beer,Bread,Diaper,Eggs,Milk
0,False,True,False,True,True
1,False,True,False,False,True
2,True,False,True,True,True
3,True,True,True,False,False
4,True,True,True,False,True
5,False,True,False,True,False


In [18]:
frequent_items = apriori(df, min_support=0.3, use_colnames=True)

In [21]:
rules = association_rules(frequent_items, metric="confidence", min_threshold=0.6)

In [23]:
top_10_rules = rules.sort_values(by=['confidence', 'lift'], ascending=False).head(10)
top_10_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
1,(Diaper),(Beer),0.5,0.5,0.5,1.0,2.0,1.0,0.25,inf,1.0,1.0,1.0,1.0
2,(Beer),(Diaper),0.5,0.5,0.5,1.0,2.0,1.0,0.25,inf,1.0,1.0,1.0,1.0
10,"(Bread, Diaper)",(Beer),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
11,"(Bread, Beer)",(Diaper),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
15,"(Milk, Diaper)",(Beer),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
16,"(Milk, Beer)",(Diaper),0.333333,0.5,0.333333,1.0,2.0,1.0,0.166667,inf,0.75,0.666667,1.0,0.833333
6,(Milk),(Bread),0.666667,0.833333,0.5,0.75,0.9,1.0,-0.055556,0.666667,-0.25,0.5,-0.5,0.675
13,(Diaper),"(Bread, Beer)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333
14,(Beer),"(Bread, Diaper)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333
18,(Diaper),"(Milk, Beer)",0.5,0.333333,0.333333,0.666667,2.0,1.0,0.166667,2.0,1.0,0.666667,0.5,0.833333


In [25]:
top_10_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,(Diaper),(Beer),0.5,1.0,2.0
2,(Beer),(Diaper),0.5,1.0,2.0
10,"(Bread, Diaper)",(Beer),0.333333,1.0,2.0
11,"(Bread, Beer)",(Diaper),0.333333,1.0,2.0
15,"(Milk, Diaper)",(Beer),0.333333,1.0,2.0
16,"(Milk, Beer)",(Diaper),0.333333,1.0,2.0
6,(Milk),(Bread),0.5,0.75,0.9
13,(Diaper),"(Bread, Beer)",0.333333,0.666667,2.0
14,(Beer),"(Bread, Diaper)",0.333333,0.666667,2.0
18,(Diaper),"(Milk, Beer)",0.333333,0.666667,2.0
