# Installing Libraries

In [13]:
pip install mlxtend

Collecting mlxtendNote: you may need to restart the kernel to use updated packages.

  Downloading mlxtend-0.23.0-py3-none-any.whl (1.4 MB)
     ---------------------------------------- 1.4/1.4 MB 440.7 kB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.0


# Loading Libraries

In [1]:
import pandas as pd
import os
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
data=pd.read_csv('Groceries_dataset.csv')

In [3]:
data.head(10)

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
5,4941,14-02-2015,rolls/buns
6,4501,08-05-2015,other vegetables
7,3803,23-12-2015,pot plants
8,2762,20-03-2015,whole milk
9,4119,12-02-2015,tropical fruit


# Applying the association Rule

In [4]:

def encoder(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

apriori_data = data.groupby(['Member_number','itemDescription'])['itemDescription'].count().reset_index(name ='Count')
apriori_basket = apriori_data.pivot_table(index = 'Member_number', columns = 'itemDescription', values = 'Count', aggfunc = 'sum').fillna(0)
apriori_basket_set = apriori_basket.applymap(encoder)
apriori_basket_set.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Member_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
1001,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,1,0,0
1002,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1003,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [5]:
frequent_itemset=apriori(apriori_basket_set, min_support=0.1, use_colnames=True)
frequent_itemset.head(7)



Unnamed: 0,support,itemsets
0,0.119548,(beef)
1,0.158799,(bottled beer)
2,0.213699,(bottled water)
3,0.135967,(brown bread)
4,0.126475,(butter)
5,0.165213,(canned beer)
6,0.100564,(chicken)


In [11]:
rules = association_rules(frequent_itemset, metric = 'lift', min_threshold = 0.8)
rules.sort_values('confidence', ascending = False, inplace = True)
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
25,(yogurt),(whole milk),0.282966,0.458184,0.15059,0.532185,1.16151,0.02094,1.158185,0.193926
1,(bottled water),(whole milk),0.213699,0.458184,0.112365,0.52581,1.147597,0.014452,1.142615,0.163569
19,(sausage),(whole milk),0.206003,0.458184,0.106978,0.519303,1.133394,0.012591,1.127146,0.14823
12,(rolls/buns),(whole milk),0.349666,0.458184,0.178553,0.510638,1.114484,0.018342,1.10719,0.157955
7,(other vegetables),(whole milk),0.376603,0.458184,0.19138,0.508174,1.109106,0.018827,1.101643,0.157802
22,(tropical fruit),(whole milk),0.23371,0.458184,0.11647,0.498353,1.087672,0.009388,1.080076,0.105189
16,(root vegetables),(whole milk),0.230631,0.458184,0.113135,0.490545,1.07063,0.007464,1.063522,0.085746
21,(soda),(whole milk),0.313494,0.458184,0.151103,0.481997,1.051973,0.007465,1.045971,0.071966
9,(yogurt),(other vegetables),0.282966,0.376603,0.120318,0.425204,1.12905,0.013752,1.084553,0.159406
2,(rolls/buns),(other vegetables),0.349666,0.376603,0.146742,0.419663,1.114335,0.015056,1.074197,0.157772


In [12]:
# Filter rules with more than 2 itemsets
rules_with_more_than_2_itemsets = rules[rules['antecedents'].apply(lambda x: len(x) > 2) | rules['consequents'].apply(lambda x: len(x) > 2)]

# Sort the filtered rules based on confidence in descending order
rules_with_more_than_2_itemsets.sort_values('confidence', ascending=False, inplace=True)

# Display the filtered rules
rules_with_more_than_2_itemsets.head(10)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rules_with_more_than_2_itemsets.sort_values('confidence', ascending=False, inplace=True)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
