In [1]:
data = [["Milk","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"],
       ["Dill","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"],
       ["Milk","Apple","Kidney Beans","Eggs"],
       ["Milk","Unicorn","Corn","Kidney Beans","Yogurt"],
       ["Corn","Onion","Onion","Kidney Beans","Ice Cream","Eggs"]]

In [2]:
display(data)


[['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
 ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
 ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
 ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
 ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice Cream', 'Eggs']]

# Transform raw data into Machine Learning data format

# Preprocessing start

In [3]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder

In [4]:
te = TransactionEncoder()

te_arry = te.fit(data).transform(data)
te_arry

array([[False, False, False,  True, False,  True,  True,  True,  True,
        False,  True],
       [False, False,  True,  True, False,  True, False,  True,  True,
        False,  True],
       [ True, False, False,  True, False,  True,  True, False, False,
        False, False],
       [False,  True, False, False, False,  True,  True, False, False,
         True,  True],
       [False,  True, False,  True,  True,  True, False, False,  True,
        False, False]])

In [5]:
te.columns_

['Apple',
 'Corn',
 'Dill',
 'Eggs',
 'Ice Cream',
 'Kidney Beans',
 'Milk',
 'Nutmeg',
 'Onion',
 'Unicorn',
 'Yogurt']

In [6]:
df = pd.DataFrame(te_arry, columns=te.columns_)
# ["Milk","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"]
# ["Dill","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"]
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice Cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


# Endpreprocess


# Now apply FPGrowth

In [7]:
! pip install mlxtend



In [8]:
!pip install mlxtend --upgrade



In [9]:
from mlxtend.frequent_patterns import fpgrowth 

In [10]:
fpgrowth(df, min_support=0.6)


Unnamed: 0,support,itemsets
0,1.0,(5)
1,0.8,(3)
2,0.6,(10)
3,0.6,(8)
4,0.6,(6)
5,0.8,"(3, 5)"
6,0.6,"(10, 5)"
7,0.6,"(8, 3)"
8,0.6,"(8, 5)"
9,0.6,"(8, 3, 5)"


In [11]:
rec1 = fpgrowth(df, min_support=0.6, use_colnames=True)
rec1

Unnamed: 0,support,itemsets
0,1.0,(Kidney Beans)
1,0.8,(Eggs)
2,0.6,(Yogurt)
3,0.6,(Onion)
4,0.6,(Milk)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Yogurt, Kidney Beans)"
7,0.6,"(Eggs, Onion)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Eggs, Onion, Kidney Beans)"


In [12]:
purchase_item = "Onion"

rec1 = fpgrowth(df, min_support=0.6, use_colnames=True)
rec1['itemsets'] = rec1['itemsets'].apply(str)
rec1[rec1['itemsets'].str.contains(purchase_item)]

Unnamed: 0,support,itemsets
3,0.6,frozenset({'Onion'})
7,0.6,"frozenset({'Eggs', 'Onion'})"
8,0.6,"frozenset({'Onion', 'Kidney Beans'})"
9,0.6,"frozenset({'Eggs', 'Onion', 'Kidney Beans'})"


# Aprori Versus FpGrowth


In [13]:
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice Cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [14]:
from mlxtend.frequent_patterns import apriori, fpgrowth


purchase_item = "Milk"
ratio = 0.6

rec1 = fpgrowth(df, min_support=ratio, use_colnames=True)
rec1['itemsets'] = rec1['itemsets'].apply(str)
rec1 = rec1[rec1['itemsets'].str.contains(purchase_item)]

rec2 = apriori(df, min_support=ratio, use_colnames=True, low_memory=True)
rec2['itemsets'] = rec2['itemsets'].apply(str)
rec2 = rec2[rec2['itemsets'].str.contains(purchase_item)]

print("==============FpGrowth=============")
display(rec1)

print("=============Aprori================")
display(rec2)



Unnamed: 0,support,itemsets
4,0.6,frozenset({'Milk'})
10,0.6,"frozenset({'Milk', 'Kidney Beans'})"




Unnamed: 0,support,itemsets
2,0.6,frozenset({'Milk'})
7,0.6,"frozenset({'Milk', 'Kidney Beans'})"
