## Dataset Load

In [4]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

## Load require packages

In [5]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

## Transform the data

In [8]:
te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)

## Generate Dataframe

In [10]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [11]:
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


## Model Training

In [12]:
from mlxtend.frequent_patterns import apriori

In [13]:
apriori(df,min_support=0.5)

Unnamed: 0,support,itemsets
0,0.8,(3)
1,1.0,(5)
2,0.6,(6)
3,0.6,(8)
4,0.6,(10)
5,0.8,"(3, 5)"
6,0.6,"(8, 3)"
7,0.6,"(5, 6)"
8,0.6,"(8, 5)"
9,0.6,"(10, 5)"


## Model Training with Column Result return

In [14]:
apriori(df,min_support=0.5, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Yogurt, Kidney Beans)"


## Calculate the length of Itemset

In [15]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Eggs, Kidney Beans)",2
6,0.6,"(Eggs, Onion)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Kidney Beans, Onion)",2
9,0.6,"(Yogurt, Kidney Beans)",2


## Length is 2 and Support is > 0.8

In [16]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.8) ]

Unnamed: 0,support,itemsets,length
5,0.8,"(Eggs, Kidney Beans)",2


In [17]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Onion', 'Eggs'} ]

Unnamed: 0,support,itemsets,length
6,0.6,"(Eggs, Onion)",2


In [18]:
oht_ary = te.fit(dataset).transform(dataset, sparse=True)
sparse_df = pd.SparseDataFrame(te_try, columns=te.columns_, default_fill_value=False)
sparse_df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


## Verbose return the number of iteration and itemset default size

In [21]:
apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1)

Iteration: 1 | Sampling itemset size 2Iteration: 2 | Sampling itemset size 2Iteration: 3 | Sampling itemset size 2Iteration: 4 | Sampling itemset size 2Iteration: 5 | Sampling itemset size 2Iteration: 6 | Sampling itemset size 2Iteration: 7 | Sampling itemset size 2Iteration: 8 | Sampling itemset size 2Iteration: 9 | Sampling itemset size 2Iteration: 10 | Sampling itemset size 2Iteration: 11 | Sampling itemset size 3Iteration: 12 | Sampling itemset size 3Iteration: 13 | Sampling itemset size 3Iteration: 14 | Sampling itemset size 3Iteration: 15 | Sampling itemset size 3Iteration: 16 | Sampling itemset size 3Iteration: 17 | Sampling itemset size 3


Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Yogurt, Kidney Beans)"


## Using Max_len set the itemset

In [30]:
apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1, max_len=3)

Iteration: 1 | Sampling itemset size 2Iteration: 2 | Sampling itemset size 2Iteration: 3 | Sampling itemset size 2Iteration: 4 | Sampling itemset size 2Iteration: 5 | Sampling itemset size 2Iteration: 6 | Sampling itemset size 2Iteration: 7 | Sampling itemset size 2Iteration: 8 | Sampling itemset size 2Iteration: 9 | Sampling itemset size 2Iteration: 10 | Sampling itemset size 2Iteration: 11 | Sampling itemset size 3Iteration: 12 | Sampling itemset size 3Iteration: 13 | Sampling itemset size 3Iteration: 14 | Sampling itemset size 3Iteration: 15 | Sampling itemset size 3Iteration: 16 | Sampling itemset size 3Iteration: 17 | Sampling itemset size 3


Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Yogurt, Kidney Beans)"
