## Installing mlxtend

In [1]:
pip install mlxtend


Note: you may need to restart the kernel to use updated packages.


## Importing Required libraries

In [2]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [3]:
food=pd.read_csv("Association_Rule_Mining_Data.csv")

In [4]:
food.head()

Unnamed: 0,Row Labels,Almonds,Poha,Banana,Beans,Besan,Boiled Rice,Bread,Brinjals,Buns & Pavs,...,Sooji & Rava,Soya Products,Sugar,Sugar Cubes,Sunflower Oils,Toor Dal,Urad Dal,Utensil Scrub Pads,Washing Bars,Whole Spices
0,6468572,,,1.0,,,,,,,...,,,1.0,1.0,,,,,,
1,6486475,,,,,,1.0,,,1.0,...,,,,,,1.0,,1.0,,
2,6504964,1.0,,,,,1.0,,,,...,,,,,,,1.0,,,
3,6529569,,,1.0,,,,,,,...,,,1.0,,,1.0,1.0,,,
4,6549521,,,1.0,,,,,,,...,,,,1.0,,,,,,


In [5]:
food.drop('Row Labels',inplace=True,axis=1)

## Replacing 1 with True and NaN values with False, so that apriori algorithm can be used

In [6]:
food = food.applymap(lambda x: True if x == 1 else False)


In [7]:
food.head()

Unnamed: 0,Almonds,Poha,Banana,Beans,Besan,Boiled Rice,Bread,Brinjals,Buns & Pavs,Cakes,...,Sooji & Rava,Soya Products,Sugar,Sugar Cubes,Sunflower Oils,Toor Dal,Urad Dal,Utensil Scrub Pads,Washing Bars,Whole Spices
0,False,False,True,False,False,False,False,False,False,False,...,False,False,True,True,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,True,True,...,False,False,False,False,False,True,False,True,False,False
2,True,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
3,False,False,True,False,False,False,False,False,False,True,...,False,False,True,False,False,True,True,False,False,False
4,False,False,True,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False


In [8]:
food.shape


(50, 53)

## Apriori algorithm uses various parameters such as support, confidence, lift to generate rules for product recommendation.

## We will provide values to these parameters and come out with various sets of products that are frequently bought together. This can help to increase revenue.

## Support is used to find frequent purchases as compared to total purhases.It is frequency of an item or its combination by total orders and allows us to filter out food items that have been purchased less frequently. 

In [9]:
frequent_food = apriori(food, min_support=0.22, use_colnames=True)

In [10]:
frequent_food

Unnamed: 0,support,itemsets
0,0.72,(Banana)
1,0.48,(Beans)
2,0.40,(Boiled Rice)
3,0.22,(Liquid Soaps & Bars)
4,0.40,(Namkeen)
...,...,...
107,0.22,"(Sugar, Other Vegetables, Other Dals)"
108,0.22,"(Sugar, Other Dals, Root Vegetables)"
109,0.26,"(Sugar, Other Dals, Urad Dal)"
110,0.22,"(Sugar, Other Vegetables, Urad Dal)"


## finding length of each itemset

In [11]:
frequent_food["itemsets_length"] = frequent_food["itemsets"].apply(lambda x: len(x))

In [12]:
frequent_food


Unnamed: 0,support,itemsets,itemsets_length
0,0.72,(Banana),1
1,0.48,(Beans),1
2,0.40,(Boiled Rice),1
3,0.22,(Liquid Soaps & Bars),1
4,0.40,(Namkeen),1
...,...,...,...
107,0.22,"(Sugar, Other Vegetables, Other Dals)",3
108,0.22,"(Sugar, Other Dals, Root Vegetables)",3
109,0.26,"(Sugar, Other Dals, Urad Dal)",3
110,0.22,"(Sugar, Other Vegetables, Urad Dal)",3


## generate rules from frequent itemsets by specifying threshold for confidence or lift

In [13]:
rules = association_rules(frequent_food, metric="confidence", min_threshold=.8)

In [14]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Namkeen),(Banana),0.4,0.72,0.36,0.9,1.25,0.072,2.8
1,(Other Vegetables),(Banana),0.38,0.72,0.32,0.842105,1.169591,0.0464,1.773333
2,(Root Vegetables),(Banana),0.44,0.72,0.4,0.909091,1.262626,0.0832,3.08
3,(Snacky Nuts),(Banana),0.4,0.72,0.36,0.9,1.25,0.072,2.8
4,(Other Vegetables),(Beans),0.38,0.48,0.32,0.842105,1.754386,0.1376,3.293333
5,(Raisins),(Beans),0.3,0.48,0.24,0.8,1.666667,0.096,2.6
6,(Toor Dal),(Other Dals),0.24,0.48,0.22,0.916667,1.909722,0.1048,6.24
7,(Urad Dal),(Sugar),0.36,0.5,0.34,0.944444,1.888889,0.16,9.0
8,"(Beans, Namkeen)",(Banana),0.26,0.72,0.24,0.923077,1.282051,0.0528,3.64
9,"(Beans, Other Dals)",(Banana),0.3,0.72,0.26,0.866667,1.203704,0.044,2.1


## Now, we will make three columns to get length of antecedent, consequent and total length of rule

In [15]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules["consequents_len"] = rules["consequents"].apply(lambda x: len(x))
rules["total_len"] = rules["antecedent_len"]+rules["consequents_len"]

In [16]:
rules


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len,total_len
0,(Namkeen),(Banana),0.4,0.72,0.36,0.9,1.25,0.072,2.8,1,1,2
1,(Other Vegetables),(Banana),0.38,0.72,0.32,0.842105,1.169591,0.0464,1.773333,1,1,2
2,(Root Vegetables),(Banana),0.44,0.72,0.4,0.909091,1.262626,0.0832,3.08,1,1,2
3,(Snacky Nuts),(Banana),0.4,0.72,0.36,0.9,1.25,0.072,2.8,1,1,2
4,(Other Vegetables),(Beans),0.38,0.48,0.32,0.842105,1.754386,0.1376,3.293333,1,1,2
5,(Raisins),(Beans),0.3,0.48,0.24,0.8,1.666667,0.096,2.6,1,1,2
6,(Toor Dal),(Other Dals),0.24,0.48,0.22,0.916667,1.909722,0.1048,6.24,1,1,2
7,(Urad Dal),(Sugar),0.36,0.5,0.34,0.944444,1.888889,0.16,9.0,1,1,2
8,"(Beans, Namkeen)",(Banana),0.26,0.72,0.24,0.923077,1.282051,0.0528,3.64,2,1,3
9,"(Beans, Other Dals)",(Banana),0.3,0.72,0.26,0.866667,1.203704,0.044,2.1,2,1,3


## Using rules to get recommendation of length 3 and with lift greater than 1.4

In [17]:
rules[ (rules['antecedent_len'] ==2) &
       (rules['lift'] > 1.4) &
       (rules['consequents_len'] ==1)     ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len,total_len
11,"(Other Vegetables, Banana)",(Beans),0.32,0.48,0.26,0.8125,1.692708,0.1064,2.773333,2,1,3
28,"(Banana, Urad Dal)",(Sugar),0.24,0.5,0.24,1.0,2.0,0.12,inf,2,1,3
29,"(Other Vegetables, Other Dals)",(Beans),0.26,0.48,0.22,0.846154,1.762821,0.0952,3.38,2,1,3
30,"(Beans, Other Dals)",(Root Vegetables),0.3,0.44,0.26,0.866667,1.969697,0.128,4.2,2,1,3
31,"(Other Dals, Root Vegetables)",(Beans),0.3,0.48,0.26,0.866667,1.805556,0.116,3.9,2,1,3
32,"(Other Vegetables, Root Vegetables)",(Beans),0.24,0.48,0.22,0.916667,1.909722,0.1048,6.24,2,1,3
33,"(Sugar, Other Vegetables)",(Beans),0.28,0.48,0.24,0.857143,1.785714,0.1056,3.64,2,1,3
34,"(Beans, Snacky Nuts)",(Root Vegetables),0.26,0.44,0.22,0.846154,1.923077,0.1056,3.64,2,1,3
35,"(Sugar, Root Vegetables)",(Beans),0.3,0.48,0.24,0.8,1.666667,0.096,2.6,2,1,3
36,"(Beans, Urad Dal)",(Sugar),0.26,0.5,0.26,1.0,2.0,0.13,inf,2,1,3


## Based upon the sets obtained, different actions can be taken to increase revenue.

## Food items which belong to same category and are frequntly purchased together can be kept just side to side for example banana, other vegetables and beans

## Items which fall frequently into the consequent category like Sugar and beans can be kept at some central location so as to ensure customers do buy them.


## Getting frequently bought together products in groups of 4

In [18]:
rules[ (rules['antecedent_len'] ==3)&
       (rules['lift'] > 1.4) &
       (rules['consequents_len'] ==1)     ]


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len,total_len
46,"(Beans, Banana, Other Dals)",(Root Vegetables),0.26,0.44,0.24,0.923077,2.097902,0.1256,7.28,3,1,4
47,"(Beans, Banana, Root Vegetables)",(Other Dals),0.3,0.48,0.24,0.8,1.666667,0.096,2.6,3,1,4
49,"(Banana, Other Dals, Root Vegetables)",(Beans),0.28,0.48,0.24,0.857143,1.785714,0.1056,3.64,3,1,4


## Similarly, we can get other product recommendations, by putting a filter on values of required lift and Itemset length.

## While working on determing itemsets, it is essential that the lift is greater than 1 and support and confidence also have good enough values to maximise returns from our strategy.