In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth

In [2]:
df = pd.read_csv("./Groceries_dataset.csv")

In [3]:
df.shape

(38765, 3)

In [4]:
df.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [5]:
df.isna().sum()

Member_number      0
Date               0
itemDescription    0
dtype: int64

In [6]:
df['itemDescription'].value_counts()

itemDescription
whole milk               2502
other vegetables         1898
rolls/buns               1716
soda                     1514
yogurt                   1334
                         ... 
rubbing alcohol             5
bags                        4
baby cosmetics              3
kitchen utensil             1
preservation products       1
Name: count, Length: 167, dtype: int64

In [7]:
sell_customer=df.groupby(['Member_number','Date'])

In [8]:
sell=[i[1]['itemDescription'].to_list() for i in list(sell_customer)]

In [9]:
len(sell)

14963

In [10]:
sell[0:10]

[['sausage', 'whole milk', 'semi-finished bread', 'yogurt'],
 ['whole milk', 'pastry', 'salty snack'],
 ['canned beer', 'misc. beverages'],
 ['sausage', 'hygiene articles'],
 ['soda', 'pickled vegetables'],
 ['frankfurter', 'curd'],
 ['sausage', 'whole milk', 'rolls/buns'],
 ['whole milk', 'soda'],
 ['beef', 'white bread'],
 ['frankfurter', 'soda', 'whipped/sour cream']]

In [11]:
enc = TransactionEncoder() 

In [12]:
encoded_df=pd.DataFrame(enc.fit(sell).transform(sell), columns=enc.columns_)

In [13]:
encoded_df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [14]:
apr_rule_items = apriori(encoded_df, 
                min_support=0.001, 
                use_colnames=True)
apr_rule_items['items_no'] = apr_rule_items['itemsets'].apply(lambda items: len(items))
rules_apr=association_rules(apr_rule_items, metric='lift', min_threshold=1)

In [15]:
fp_rule_items = fpgrowth(encoded_df, 
                min_support=0.001, 
                use_colnames=True)
fp_rule_items['items_no'] = fp_rule_items['itemsets'].apply(lambda items: len(items))
rules_fp=association_rules(fp_rule_items, metric='lift', min_threshold=1)

In [41]:
fp_rule_items.head()

Unnamed: 0,support,itemsets,items_no
0,0.157923,(whole milk),1
1,0.085879,(yogurt),1
2,0.060349,(sausage),1
3,0.00949,(semi-finished bread),1
4,0.051728,(pastry),1


In [42]:
fp_rule_items.tail()

Unnamed: 0,support,itemsets,items_no
745,0.001403,"(chewing gum, yogurt)",2
746,0.001069,"(other vegetables, chewing gum)",2
747,0.001002,"(soda, chewing gum)",2
748,0.001069,"(pasta, whole milk)",2
749,0.001002,"(rolls/buns, seasonal products)",2


In [43]:
rules_fp.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(rolls/buns, yogurt)",(whole milk),0.007819,0.157923,0.001337,0.17094,1.082428,0.000102,1.015701,0.076751
1,"(rolls/buns, whole milk)",(yogurt),0.013968,0.085879,0.001337,0.095694,1.114293,0.000137,1.010854,0.104023
2,"(yogurt, whole milk)",(rolls/buns),0.011161,0.110005,0.001337,0.11976,1.088685,0.000109,1.011083,0.08238
3,(rolls/buns),"(yogurt, whole milk)",0.110005,0.011161,0.001337,0.012151,1.088685,0.000109,1.001002,0.09153
4,(yogurt),"(rolls/buns, whole milk)",0.085879,0.013968,0.001337,0.015564,1.114293,0.000137,1.001622,0.112206


In [45]:
rules_apr.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(UHT-milk),(tropical fruit),0.021386,0.067767,0.001537,0.071875,1.060617,8.785064e-05,1.004426,0.058402
1,(tropical fruit),(UHT-milk),0.067767,0.021386,0.001537,0.022682,1.060617,8.785064e-05,1.001326,0.061307
2,(beef),(brown bread),0.03395,0.037626,0.001537,0.045276,1.203301,0.0002597018,1.008012,0.174891
3,(brown bread),(beef),0.037626,0.03395,0.001537,0.040853,1.203301,0.0002597018,1.007196,0.175559
4,(beef),(citrus fruit),0.03395,0.053131,0.001804,0.05315,1.000349,6.297697e-07,1.00002,0.000361


In [46]:
apr_rule_items.head()

Unnamed: 0,support,itemsets,items_no
0,0.00401,(Instant food products),1
1,0.021386,(UHT-milk),1
2,0.00147,(abrasive cleaner),1
3,0.001938,(artif. sweetener),1
4,0.008087,(baking powder),1


In [47]:
apr_rule_items.tail()

Unnamed: 0,support,itemsets,items_no
745,0.001136,"(rolls/buns, sausage, whole milk)",3
746,0.001002,"(rolls/buns, soda, whole milk)",3
747,0.001337,"(rolls/buns, yogurt, whole milk)",3
748,0.001069,"(soda, whole milk, sausage)",3
749,0.00147,"(whole milk, yogurt, sausage)",3
