# Apriori Algorithm

In [2]:
import random
import numpy as np
import string
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder

In [23]:
results = [['Milk', 'Onion', 'Nuts', 'Eggs', 'Ice cream', 'Yogurt'], 
          ['Dill', 'Onion', 'Nuts', 'Eggs', 'Ice cream', 'Yogurt'],
          ['Milk', 'Apple', 'Eggs', 'Ice cream'],
          ['Milk', 'Unicorn', 'Corn', 'Ice cream', 'Yogurt'],
          ['Corn', 'Onion', 'Onion', 'Ice cream', 'Eggs', 'Cake']]
results

[['Milk', 'Onion', 'Nuts', 'Eggs', 'Ice cream', 'Yogurt'],
 ['Dill', 'Onion', 'Nuts', 'Eggs', 'Ice cream', 'Yogurt'],
 ['Milk', 'Apple', 'Eggs', 'Ice cream'],
 ['Milk', 'Unicorn', 'Corn', 'Ice cream', 'Yogurt'],
 ['Corn', 'Onion', 'Onion', 'Ice cream', 'Eggs', 'Cake']]

In [24]:
TE = TransactionEncoder()
res = TE.fit(results).transform(results)

In [25]:
df = pd.DataFrame(res, columns=TE.columns_)
df

Unnamed: 0,Apple,Cake,Corn,Dill,Eggs,Ice cream,Milk,Nuts,Onion,Unicorn,Yogurt
0,False,False,False,False,True,True,True,True,True,False,True
1,False,False,False,True,True,True,False,True,True,False,True
2,True,False,False,False,True,True,True,False,False,False,False
3,False,False,True,False,False,True,True,False,False,True,True
4,False,True,True,False,True,True,False,False,True,False,False


In [27]:
# Let's first extract the items that appear together (or alone) with a minimal support of 60%
frequent_items = apriori(df, min_support=0.6, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Ice cream)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Ice cream, Eggs)"
6,0.6,"(Onion, Eggs)"
7,0.6,"(Ice cream, Milk)"
8,0.6,"(Onion, Ice cream)"
9,0.6,"(Ice cream, Yogurt)"


In [28]:
# Displaying a metric to better understand what's going on with these items that are appearing together
result = association_rules(frequent_items, metric="confidence", min_threshold=0.7)
result

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Ice cream),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
1,(Eggs),(Ice cream),0.8,1.0,0.8,1.0,1.0,0.0,inf
2,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(Eggs),(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6
4,(Milk),(Ice cream),0.6,1.0,0.6,1.0,1.0,0.0,inf
5,(Onion),(Ice cream),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,(Yogurt),(Ice cream),0.6,1.0,0.6,1.0,1.0,0.0,inf
7,"(Onion, Ice cream)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
8,"(Onion, Eggs)",(Ice cream),0.6,1.0,0.6,1.0,1.0,0.0,inf
9,"(Eggs, Ice cream)",(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6


In [29]:
new_result = result[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
new_result

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Ice cream),(Eggs),0.8,0.8,1.0
1,(Eggs),(Ice cream),0.8,1.0,1.0
2,(Onion),(Eggs),0.6,1.0,1.25
3,(Eggs),(Onion),0.6,0.75,1.25
4,(Milk),(Ice cream),0.6,1.0,1.0
5,(Onion),(Ice cream),0.6,1.0,1.0
6,(Yogurt),(Ice cream),0.6,1.0,1.0
7,"(Onion, Ice cream)",(Eggs),0.6,1.0,1.25
8,"(Onion, Eggs)",(Ice cream),0.6,1.0,1.0
9,"(Eggs, Ice cream)",(Onion),0.6,0.75,1.25


In [30]:
brand_new_result = new_result[new_result['confidence']>=1]
brand_new_result

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,(Eggs),(Ice cream),0.8,1.0,1.0
2,(Onion),(Eggs),0.6,1.0,1.25
4,(Milk),(Ice cream),0.6,1.0,1.0
5,(Onion),(Ice cream),0.6,1.0,1.0
6,(Yogurt),(Ice cream),0.6,1.0,1.0
7,"(Onion, Ice cream)",(Eggs),0.6,1.0,1.25
8,"(Onion, Eggs)",(Ice cream),0.6,1.0,1.0
10,(Onion),"(Eggs, Ice cream)",0.6,1.0,1.25


# FP Growth Algorithm

In [33]:
# Fp growth are faster
fp_growth = fpgrowth(df, min_support = 0.6, use_colnames=True)
fp_growth

Unnamed: 0,support,itemsets
0,1.0,(Ice cream)
1,0.8,(Eggs)
2,0.6,(Yogurt)
3,0.6,(Onion)
4,0.6,(Milk)
5,0.8,"(Ice cream, Eggs)"
6,0.6,"(Ice cream, Yogurt)"
7,0.6,"(Onion, Eggs)"
8,0.6,"(Onion, Ice cream)"
9,0.6,"(Onion, Ice cream, Eggs)"
