## Installation via pip


In [None]:
!pip install mlxtend  



# Apriori in python
The apriori function expects data in a one-hot encoded pandas DataFrame. Suppose we have the following transaction data:

In [None]:
# Sample dataset from documentation
# http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/#association-rules-generation-from-frequent-itemsets
# Refer to Prof's slide
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]


In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
1,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
2,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(Milk),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
4,(Onion),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
5,(Yogurt),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,"(Onion, Kidney Beans)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
7,"(Eggs, Onion)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
8,(Onion),"(Eggs, Kidney Beans)",0.6,0.8,0.6,1.0,1.25,0.12,inf


In [None]:
# Dataset from Dropbox and Prof
df = pd.read_csv('MilkAssociationWeka.csv')
df = df.replace("yes", 1).replace("no", 0)
df

Unnamed: 0,milk,bread,butter,beer,diapers
0,1,1,0,0,0
1,0,0,1,0,0
2,0,0,0,1,1
3,1,1,1,0,0
4,0,1,0,0,0
5,0,0,0,1,1
6,0,0,0,1,1
7,0,0,0,1,1
8,0,0,0,1,1


## Compare the result with the Weka Association Rules
- pay attention to the lift / support / confidence
- association rules : Metric to evaluate if a rule is of interest. Automatically set to 'support' if support_only=True. Otherwise, supported metrics are 'support', 'confidence', 'lift'



In [None]:
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric = 'confidence', min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(milk),(bread),0.222222,0.333333,0.222222,1.0,3.0,0.148148,inf
1,(beer),(diapers),0.555556,0.555556,0.555556,1.0,1.8,0.246914,inf
2,(diapers),(beer),0.555556,0.555556,0.555556,1.0,1.8,0.246914,inf


- Pandas DataFrames make it easy to filter the results further. Let's say we are ony interested in rules that satisfy the following criteria:
    - at least 2 antecedents
    - a confidence > 0.75
    - a lift score > 1.2