In [6]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
import numpy as np 
import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules 

In [36]:
df = pd.read_csv('shopping_cart.csv', names =['groceryProducts'], sep = ',')
df.head()


Unnamed: 0,groceryProducts
0,"Lays,Garlic,Honey"
1,"Garlic,Lays,Honey,Cheese"
2,"Garlic,Tea,Biscuits"
3,"Jam,Noodles,Garlic,Lays"
4,"Noodles,Tea,Honey"


In [37]:
df.shape

(24, 1)

In [38]:
data = list(df["groceryProducts"].apply(lambda x:x.split(",") ))
data

[['Lays', 'Garlic', 'Honey'],
 ['Garlic', 'Lays', 'Honey', 'Cheese'],
 ['Garlic', 'Tea', 'Biscuits'],
 ['Jam', 'Noodles', 'Garlic', 'Lays'],
 ['Noodles', 'Tea', 'Honey'],
 ['Garlic', 'Tea', 'Biscuits'],
 ['Noodles', 'Tea', 'Cheese'],
 ['Noodles', 'Garlic', 'Tea', 'Honey'],
 ['Jam', 'Noodles', 'Garlic', 'Tea'],
 ['Garlic', 'Lays'],
 ['Coffee', 'Chips', 'Honey', 'Cheese'],
 ['Coffee', 'Chips', 'Honey', 'Cheese'],
 ['Coffee', 'Sugar', 'Biscuits'],
 ['Garlic', 'Coffee', 'Chips'],
 ['Garlic', 'Sugar', 'Honey'],
 ['Coffee', 'Sugar', 'Cheese'],
 ['Garlic', 'Sugar', 'Biscuits'],
 ['Garlic', 'Coffee', 'Sugar'],
 ['Garlic', 'Coffee', 'Sugar'],
 ['Tea', 'Lays', 'Coffee', 'Cheese'],
 ['Garlic', 'Sushi', 'Dairy Milk', 'Lays'],
 ['Noodles', 'Tea', 'Sushi', 'Honey'],
 ['Dairy Milk', 'Tea', 'Chips', 'Sugar'],
 ['Garlic', 'Sushi', 'Lays', 'Biscuits']]

In [39]:
from mlxtend.preprocessing import TransactionEncoder
x = TransactionEncoder()
x_data = x.fit(data).transform(data)
df = pd.DataFrame(x_data, columns=x.columns_)
df = df.replace(False, 0)
df = df.replace(True, 1)
df

Unnamed: 0,Biscuits,Cheese,Chips,Coffee,Dairy Milk,Garlic,Honey,Jam,Lays,Noodles,Sugar,Sushi,Tea
0,0,0,0,0,0,1,1,0,1,0,0,0,0
1,0,1,0,0,0,1,1,0,1,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,0,0,1
3,0,0,0,0,0,1,0,1,1,1,0,0,0
4,0,0,0,0,0,0,1,0,0,1,0,0,1
5,1,0,0,0,0,1,0,0,0,0,0,0,1
6,0,1,0,0,0,0,0,0,0,1,0,0,1
7,0,0,0,0,0,1,1,0,0,1,0,0,1
8,0,0,0,0,0,1,0,1,0,1,0,0,1
9,0,0,0,0,0,1,0,0,1,0,0,0,0


In [40]:
df_apr = apriori(df, min_support = 0.2, use_colnames = True, verbose = 1)
df_apr

Processing 6 combinations | Sampling itemset size 32


Unnamed: 0,support,itemsets
0,0.208333,(Biscuits)
1,0.25,(Cheese)
2,0.333333,(Coffee)
3,0.625,(Garlic)
4,0.333333,(Honey)
5,0.291667,(Lays)
6,0.25,(Noodles)
7,0.291667,(Sugar)
8,0.375,(Tea)
9,0.25,"(Lays, Garlic)"


In [41]:
df_ar = association_rules(df_apr, metric = "confidence", min_threshold = 0.6)
df_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Lays),(Garlic),0.291667,0.625,0.25,0.857143,1.371429,0.067708,2.625
1,(Noodles),(Tea),0.25,0.375,0.208333,0.833333,2.222222,0.114583,3.75


In [42]:
# Experiment with different min_support values
for min_sup in [ 0.02, 0.05,0.06]:
    frq_items = apriori(df, min_support=min_sup, use_colnames=True)
    rules = association_rules(frq_items, metric="lift", min_threshold=1)
    print(f"Number of rules with min_support {min_sup}: {len(rules)}")

Number of rules with min_support 0.02: 416
Number of rules with min_support 0.05: 112
Number of rules with min_support 0.06: 112
