# Market Basket Optimisation

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from csv import reader
import mlxtend
from mlxtend.preprocessing import TransactionEncoder

with open('Market_Basket_Optimisation.csv', 'r') as read_obj:
    
    csv_reader = reader(read_obj)
    l = list(csv_reader)
    
te=TransactionEncoder()
te_ary=te.fit(l).transform(l)    
df=pd.DataFrame(te_ary, columns=te.columns_)  
df

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Columns: 120 entries,  asparagus to zucchini
dtypes: bool(120)
memory usage: 879.1 KB


In [3]:
df.isnull().sum().sum()

0

In [4]:
df['asparagus'].value_counts()

False    7466
True       35
Name: asparagus, dtype: int64

In [5]:
from mlxtend.frequent_patterns import apriori
topfrequent_itemsets = apriori(df, min_support=0.1, use_colnames = True)
topfrequent_itemsets

Unnamed: 0,support,itemsets
0,0.163845,(chocolate)
1,0.179709,(eggs)
2,0.170911,(french fries)
3,0.132116,(green tea)
4,0.129583,(milk)
5,0.238368,(mineral water)
6,0.17411,(spaghetti)


# Most buyed items such as mineral water or eggs should be place in the back of the store so that customer will walk more distance and stay longer in the store thus enhancing the chances of seeing and buying another pruduct they didn't think of

In [6]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames = True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.033329,(avocado)
2,0.010799,(barbecue sauce)
3,0.014265,(black tea)
4,0.011465,(body spray)
...,...,...
252,0.011065,"(milk, mineral water, ground beef)"
253,0.017064,"(mineral water, spaghetti, ground beef)"
254,0.015731,"(milk, mineral water, spaghetti)"
255,0.010265,"(olive oil, mineral water, spaghetti)"


# All items with high support scores should be place at the back store in the low shelfs while rarely purchased items should be placed at the beginning in the eye-level shelf to enhance their chances of being bought

In [7]:
from mlxtend.frequent_patterns import association_rules 
topconf = association_rules(frequent_itemsets,metric="confidence",min_threshold=0.5) 
topconf

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(eggs, ground beef)",(mineral water),0.019997,0.238368,0.010132,0.506667,2.125563,0.005365,1.543848
1,"(milk, ground beef)",(mineral water),0.021997,0.238368,0.011065,0.50303,2.110308,0.005822,1.532552


# Nearly half of the people who buy  ground beef and eggs, buy mineral water. So we should place those items at opposite parts of the store to maximize the time the customer spents in it and therefore enhacing the chances of him buying unplanned products

In [8]:
conf = association_rules(frequent_itemsets,metric="confidence",min_threshold=0.2)
conf

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(avocado),(mineral water),0.033329,0.238368,0.011598,0.348000,1.459926,0.003654,1.168147
1,(burgers),(eggs),0.087188,0.179709,0.028796,0.330275,1.837830,0.013128,1.224818
2,(burgers),(french fries),0.087188,0.170911,0.021997,0.252294,1.476173,0.007096,1.108844
3,(burgers),(green tea),0.087188,0.132116,0.017464,0.200306,1.516139,0.005945,1.085270
4,(burgers),(milk),0.087188,0.129583,0.017864,0.204893,1.581175,0.006566,1.094717
...,...,...,...,...,...,...,...,...,...
157,"(mineral water, spaghetti)",(milk),0.059725,0.129583,0.015731,0.263393,2.032623,0.007992,1.181657
158,"(olive oil, mineral water)",(spaghetti),0.027596,0.174110,0.010265,0.371981,2.136468,0.005460,1.315071
159,"(olive oil, spaghetti)",(mineral water),0.022930,0.238368,0.010265,0.447674,1.878079,0.004799,1.378954
160,"(pancakes, mineral water)",(spaghetti),0.033729,0.174110,0.011465,0.339921,1.952333,0.005593,1.251198


In [9]:
conf = association_rules(frequent_itemsets,metric="confidence",min_threshold=0.05)
lowconf = conf[(conf['confidence'] < 0.1)]
lowconf

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
26,(chocolate),(cake),0.163845,0.081056,0.013598,0.082994,1.023915,0.000318,1.002114
45,(chocolate),(champagne),0.163845,0.046794,0.011598,0.070789,1.512793,0.003932,1.025824
47,(chocolate),(chicken),0.163845,0.059992,0.014665,0.089504,1.491927,0.004835,1.032413
49,(eggs),(chicken),0.179709,0.059992,0.014398,0.080119,1.335490,0.003617,1.021880
50,(french fries),(chicken),0.170911,0.059992,0.011065,0.064743,1.079187,0.000812,1.005079
...,...,...,...,...,...,...,...,...,...
404,(spaghetti),"(mineral water, ground beef)",0.174110,0.040928,0.017064,0.098009,2.394681,0.009938,1.063284
410,(mineral water),"(milk, spaghetti)",0.238368,0.035462,0.015731,0.065996,1.861024,0.007278,1.032691
411,(spaghetti),"(milk, mineral water)",0.174110,0.047994,0.015731,0.090352,1.882589,0.007375,1.046566
416,(spaghetti),"(olive oil, mineral water)",0.174110,0.027596,0.010265,0.058959,2.136468,0.005460,1.033327


# On the contrary , related items with a low confidence score, suchs as french fries and chicken, should be placed nearby to each others to increase the likelihood of them being purchased together, therefore increasing profits

In [10]:
toplift = association_rules(frequent_itemsets,metric="lift",min_threshold=2.5)
toplift

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(herb & pepper),(ground beef),0.04946,0.098254,0.015998,0.32345,3.291994,0.011138,1.33286
1,(ground beef),(herb & pepper),0.098254,0.04946,0.015998,0.162822,3.291994,0.011138,1.13541
2,"(mineral water, spaghetti)",(ground beef),0.059725,0.098254,0.017064,0.285714,2.907928,0.011196,1.262445
3,(ground beef),"(mineral water, spaghetti)",0.098254,0.059725,0.017064,0.173677,2.907928,0.011196,1.137902
4,"(mineral water, spaghetti)",(olive oil),0.059725,0.065858,0.010265,0.171875,2.609786,0.006332,1.128021
5,(olive oil),"(mineral water, spaghetti)",0.065858,0.059725,0.010265,0.15587,2.609786,0.006332,1.113898


# Item combinations with a top lift score >2.5 are very more likely to be bought together than alone. Therefore they should be placed at opposite places in the store to increase the time spent in the store and the chances of impulsive purchases. For example, ground beef and herb/pepper sections should be placed as far as possible from each other.

In [11]:
lift = association_rules(frequent_itemsets,metric="lift",min_threshold=0.05)
lowlift = conf[(conf['lift'] < 0.1)]
lowconf

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
26,(chocolate),(cake),0.163845,0.081056,0.013598,0.082994,1.023915,0.000318,1.002114
45,(chocolate),(champagne),0.163845,0.046794,0.011598,0.070789,1.512793,0.003932,1.025824
47,(chocolate),(chicken),0.163845,0.059992,0.014665,0.089504,1.491927,0.004835,1.032413
49,(eggs),(chicken),0.179709,0.059992,0.014398,0.080119,1.335490,0.003617,1.021880
50,(french fries),(chicken),0.170911,0.059992,0.011065,0.064743,1.079187,0.000812,1.005079
...,...,...,...,...,...,...,...,...,...
404,(spaghetti),"(mineral water, ground beef)",0.174110,0.040928,0.017064,0.098009,2.394681,0.009938,1.063284
410,(mineral water),"(milk, spaghetti)",0.238368,0.035462,0.015731,0.065996,1.861024,0.007278,1.032691
411,(spaghetti),"(milk, mineral water)",0.174110,0.047994,0.015731,0.090352,1.882589,0.007375,1.046566
416,(spaghetti),"(olive oil, mineral water)",0.174110,0.027596,0.010265,0.058959,2.136468,0.005460,1.033327


# On the opposite, combinations of items with a low confidence score and somewhat related should be placed side by side to enhance the possibility of them being bought with each other, such as chocolate and cake for example