In [1]:
import numpy as np
import pandas as pd

In [15]:
data = [["Milk","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"],
       ["Dill","Onion","Nutmeg","Kidney Beans","Eggs","Yogurt"],
       ["Milk","Apple","Kidney Beans","Eggs"],
       ["Milk","Unicorn","Corn","Kidney Beans","Yogurt"],
       ["Corn","Onion","Kidney Beans","Ice Cream","Eggs"]]

# Transform raw data into machine learning
## preprocessing start

In [4]:
from mlxtend.preprocessing import TransactionEncoder

In [16]:
te = TransactionEncoder()
te_array = te.fit(data).transform(data)
te_array

array([[False, False, False,  True, False,  True,  True,  True,  True,
        False,  True],
       [False, False,  True,  True, False,  True, False,  True,  True,
        False,  True],
       [ True, False, False,  True, False,  True,  True, False, False,
        False, False],
       [False,  True, False, False, False,  True,  True, False, False,
         True,  True],
       [False,  True, False,  True,  True,  True, False, False,  True,
        False, False]])

In [17]:
te.columns_

['Apple',
 'Corn',
 'Dill',
 'Eggs',
 'Ice Cream',
 'Kidney Beans',
 'Milk',
 'Nutmeg',
 'Onion',
 'Unicorn',
 'Yogurt']

In [18]:
te.columns_mapping_

{'Apple': 0,
 'Corn': 1,
 'Dill': 2,
 'Eggs': 3,
 'Ice Cream': 4,
 'Kidney Beans': 5,
 'Milk': 6,
 'Nutmeg': 7,
 'Onion': 8,
 'Unicorn': 9,
 'Yogurt': 10}

In [21]:
df = pd.DataFrame(te_array,columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice Cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


# End PreProcessing

# Now Apply Appriori and FP Growth Algo

In [23]:
from mlxtend.frequent_patterns import fpgrowth

In [33]:
support = 0.6
fpgrowth(df,min_support=support)

Unnamed: 0,support,itemsets
0,1.0,(5)
1,0.8,(3)
2,0.6,(10)
3,0.6,(8)
4,0.6,(6)
5,0.8,"(3, 5)"
6,0.6,"(10, 5)"
7,0.6,"(8, 3)"
8,0.6,"(8, 5)"
9,0.6,"(8, 3, 5)"


In [34]:
rec1 = fpgrowth(df,min_support=support,use_colnames=True)
rec1

Unnamed: 0,support,itemsets
0,1.0,(Kidney Beans)
1,0.8,(Eggs)
2,0.6,(Yogurt)
3,0.6,(Onion)
4,0.6,(Milk)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Kidney Beans, Yogurt)"
7,0.6,"(Eggs, Onion)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Eggs, Kidney Beans, Onion)"


# Now purchase one product and then predict 0.6 support items

In [55]:
%%time
purchase_items = "Milk"
predict_items = fpgrowth(df,min_support=support,use_colnames=True)
predict_items.itemsets.values[0]
predict_items['itemsets']= predict_items['itemsets'].apply(str)
print(predict_items.itemsets.values[0])

predict1 =  predict_items[predict_items.itemsets.str.contains(purchase_items)]
display(predict1)

frozenset({'Kidney Beans'})


Unnamed: 0,support,itemsets
4,0.6,frozenset({'Milk'})
10,0.6,"frozenset({'Milk', 'Kidney Beans'})"


Wall time: 11.4 ms


# Now Using Appriori Algo

In [50]:
from mlxtend.frequent_patterns import fpgrowth,apriori

In [53]:
%%time
purchase_items = "Milk"
predict_items = apriori(df,min_support=support,use_colnames=True)
predict_items.itemsets.values[0]
predict_items['itemsets']= predict_items['itemsets'].apply(str)
print(predict_items.itemsets.values[0])

predict2 =  predict_items[predict_items.itemsets.str.contains(purchase_items)]
display(predict2)

frozenset({'Eggs'})


Unnamed: 0,support,itemsets
2,0.6,frozenset({'Milk'})
7,0.6,"frozenset({'Milk', 'Kidney Beans'})"


Wall time: 27.5 ms
