(video link)
[https://www.youtube.com/watch?v=Cryve9ZWbYk&t=161s]

## Frequent Pattern Growth Algo
#### Imports

In [17]:
import pandas as pd
import random 
import numpy as np
!pip install mlxtend
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

Defaulting to user installation because normal site-packages is not writeable


#### Make random dataset

In [18]:
groceries = ['Apples', 'Bananas', 'Milk', 'Bread', 'Eggs', 'Cheese', 'Tomatoes', 'Potatoes', 'Onions', 'Chicken']

# Generating 6 arrays with shuffled orders and 70% chance for 1, 30% chance for 0 for each item
arrays = []
for _ in range(6):
    random.shuffle(groceries)  # Shuffle the order of groceries
    random_frequency = [random.choices([0, 1], weights=[0.3, 0.7])[0] for _ in range(len(groceries))]
    arrays.append(dict(zip(groceries, random_frequency)))

# Displaying the generated arrays
for index, arr in enumerate(arrays, 1):
    print(f"Array {index}: {arr}")



Array 1: {'Potatoes': 1, 'Bread': 1, 'Tomatoes': 1, 'Onions': 1, 'Milk': 1, 'Eggs': 1, 'Bananas': 1, 'Chicken': 1, 'Cheese': 1, 'Apples': 1}
Array 2: {'Apples': 1, 'Bananas': 0, 'Onions': 1, 'Tomatoes': 1, 'Potatoes': 1, 'Milk': 1, 'Chicken': 1, 'Eggs': 1, 'Bread': 1, 'Cheese': 0}
Array 3: {'Bananas': 1, 'Eggs': 1, 'Bread': 1, 'Apples': 1, 'Chicken': 1, 'Onions': 1, 'Tomatoes': 1, 'Cheese': 1, 'Milk': 1, 'Potatoes': 1}
Array 4: {'Cheese': 0, 'Eggs': 0, 'Bananas': 1, 'Milk': 1, 'Chicken': 1, 'Bread': 1, 'Apples': 1, 'Potatoes': 1, 'Tomatoes': 1, 'Onions': 0}
Array 5: {'Apples': 1, 'Milk': 1, 'Cheese': 1, 'Tomatoes': 1, 'Potatoes': 1, 'Chicken': 1, 'Bananas': 1, 'Onions': 1, 'Eggs': 1, 'Bread': 1}
Array 6: {'Bread': 1, 'Eggs': 1, 'Cheese': 1, 'Milk': 1, 'Potatoes': 1, 'Apples': 1, 'Bananas': 1, 'Tomatoes': 0, 'Onions': 1, 'Chicken': 1}


#### Clean up dataset

In [19]:
dataset = [
    ['Milk', 'Cheese', 'Eggs', 'Potatoes', 'Bananas', 'Onions'],
    ['Eggs', 'Bread', 'Cheese', 'Potatoes', 'Milk', 'Bananas'],
    ['Potatoes', 'Bread', 'Milk', 'Onions', 'Cheese', 'Chicken', 'Tomatoes'],
    ['Bananas', 'Bread', 'Apples', 'Onions', 'Potatoes', 'Tomatoes'],
    ['Potatoes', 'Tomatoes', 'Onions', 'Apples', 'Eggs', 'Cheese', 'Bread'],
    ['Chicken', 'Potatoes', 'Apples', 'Bread', 'Cheese', 'Milk']
]

print(dataset)

[['Milk', 'Cheese', 'Eggs', 'Potatoes', 'Bananas', 'Onions'], ['Eggs', 'Bread', 'Cheese', 'Potatoes', 'Milk', 'Bananas'], ['Potatoes', 'Bread', 'Milk', 'Onions', 'Cheese', 'Chicken', 'Tomatoes'], ['Bananas', 'Bread', 'Apples', 'Onions', 'Potatoes', 'Tomatoes'], ['Potatoes', 'Tomatoes', 'Onions', 'Apples', 'Eggs', 'Cheese', 'Bread'], ['Chicken', 'Potatoes', 'Apples', 'Bread', 'Cheese', 'Milk']]


#### Convert the dataset to true or false values

In [20]:
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
te_array

array([[False,  True, False,  True, False,  True,  True,  True,  True,
        False],
       [False,  True,  True,  True, False,  True,  True, False,  True,
        False],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [ True,  True,  True, False, False, False, False,  True,  True,
         True],
       [ True, False,  True,  True, False,  True, False,  True,  True,
         True],
       [ True, False,  True,  True,  True, False,  True, False,  True,
        False]])

#### Convert to dataframe

In [21]:
df = pd.DataFrame(te_array, columns = te.columns_)
df

Unnamed: 0,Apples,Bananas,Bread,Cheese,Chicken,Eggs,Milk,Onions,Potatoes,Tomatoes
0,False,True,False,True,False,True,True,True,True,False
1,False,True,True,True,False,True,True,False,True,False
2,False,False,True,True,True,False,True,True,True,True
3,True,True,True,False,False,False,False,True,True,True
4,True,False,True,True,False,True,False,True,True,True
5,True,False,True,True,True,False,True,False,True,False


## FPG Algorith

In [24]:
fpgrowth(df, min_support=0.6)

Unnamed: 0,support,itemsets
0,1.0,(8)
1,0.833333,(3)
2,0.666667,(7)
3,0.666667,(6)
4,0.833333,(2)
5,0.833333,"(8, 3)"
6,0.666667,"(8, 7)"
7,0.666667,"(3, 6)"
8,0.666667,"(8, 6)"
9,0.666667,"(8, 3, 6)"


## Apriori Algo

In [22]:
from mlxtend.frequent_patterns import apriori
frequent_itemsets=apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.833333,(Bread)
1,0.833333,(Cheese)
2,0.666667,(Milk)
3,0.666667,(Onions)
4,1.0,(Potatoes)
5,0.666667,"(Bread, Cheese)"
6,0.833333,"(Potatoes, Bread)"
7,0.666667,"(Cheese, Milk)"
8,0.833333,"(Potatoes, Cheese)"
9,0.666667,"(Potatoes, Milk)"


In [23]:
rules = association_rules(frequent_itemsets, metric = "confidence", min_threshold = 0.8)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Potatoes),(Bread),1.0,0.833333,0.833333,0.833333,1.0,0.0,1.0,0.0
1,(Bread),(Potatoes),0.833333,1.0,0.833333,1.0,1.0,0.0,inf,0.0
2,(Milk),(Cheese),0.666667,0.833333,0.666667,1.0,1.2,0.111111,inf,0.5
3,(Potatoes),(Cheese),1.0,0.833333,0.833333,0.833333,1.0,0.0,1.0,0.0
4,(Cheese),(Potatoes),0.833333,1.0,0.833333,1.0,1.0,0.0,inf,0.0
5,(Milk),(Potatoes),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
6,(Onions),(Potatoes),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
7,"(Bread, Cheese)",(Potatoes),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
8,"(Potatoes, Milk)",(Cheese),0.666667,0.833333,0.666667,1.0,1.2,0.111111,inf,0.5
9,"(Cheese, Milk)",(Potatoes),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0


In [25]:
from mlxtend.frequent_patterns import apriori
%timeit apriori(df, min_support = 0.6)

1.56 ms ± 84.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [26]:
%timeit fpgrowth(df, min_support = 0.6)

534 µs ± 12.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
