* Project - Market Basket Analysis 
*  Dataset - Retail Grocery 
* Problem Statement - To determine frequent purchases and recommend for cross-selling 

In [9]:
# Libraries

import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [39]:
data = pd.read_csv(r'F:\Portfolio\Market Basket Analysis\Groceries_dataset.csv')

In [40]:
data.head()

Unnamed: 0,Member_number,Date,itemDescription,Quantity
0,1808,21-07-2015,tropical fruit,1032
1,2552,05-01-2015,whole milk,2502
2,2300,19-09-2015,pip fruit,744
3,1187,12-12-2015,other vegetables,1898
4,3037,01-02-2015,whole milk,2501


# Data Cleaning

In [12]:
# Remove spaces from begining to ending
data['itemDescription'] = data['itemDescription'].str.strip()

In [13]:
# To remove missing values
data.dropna(axis=0, subset=['Member_number'], inplace=True)

In [14]:
# Convert int to string
data['Member_number'] = data['Member_number'].astype('str')

In [19]:
#data['Product Description'].nunique()
data['Product Description'].unique()


array(['tropical fruit', 'whole milk', 'pip fruit', 'other vegetables',
       'rolls/buns', 'pot plants', 'citrus fruit', 'beef', 'frankfurter',
       'chicken', 'butter', 'fruit/vegetable juice',
       'packaged fruit/vegetables', 'chocolate', 'specialty bar',
       'butter milk', 'bottled water', 'yogurt', 'sausage', 'brown bread',
       'hamburger meat', 'root vegetables', 'pork', 'pastry',
       'canned beer', 'berries', 'coffee', 'misc. beverages', 'ham',
       'turkey', 'curd cheese', 'red/blush wine',
       'frozen potato products', 'flour', 'sugar', 'frozen meals',
       'herbs', 'soda', 'detergent', 'grapes', 'processed cheese', 'fish',
       'sparkling wine', 'newspapers', 'curd', 'pasta', 'popcorn',
       'finished products', 'beverages', 'bottled beer', 'dessert',
       'dog food', 'specialty chocolate', 'condensed milk', 'cleaner',
       'white wine', 'meat', 'ice cream', 'hard cheese', 'cream cheese',
       'liquor', 'pickled vegetables', 'liquor (appetizer)

In [41]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38765 entries, 0 to 38764
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Member_number    38765 non-null  int64 
 1   Date             38765 non-null  object
 2   itemDescription  38765 non-null  object
 3   Quantity         38765 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 1.2+ MB


In [42]:
# Rename Columns
data.rename(columns = {'Member_number' : 'Product ID', 'itemDescription':'Product Description'}, inplace=True)

In [43]:
data_1 = data['Product Description'].value_counts()
data_1

whole milk               2502
other vegetables         1898
rolls/buns               1716
soda                     1514
yogurt                   1334
                         ... 
frozen chicken              5
bags                        4
baby cosmetics              3
kitchen utensil             1
preservation products       1
Name: Product Description, Length: 167, dtype: int64

In [44]:
data.head()

Unnamed: 0,Product ID,Date,Product Description,Quantity
0,1808,21-07-2015,tropical fruit,1032
1,2552,05-01-2015,whole milk,2502
2,2300,19-09-2015,pip fruit,744
3,1187,12-12-2015,other vegetables,1898
4,3037,01-02-2015,whole milk,2501


In [56]:
basket = data.groupby(['Product ID','Product Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('Product ID')
basket

Product Description,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Product ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3389.0,500.0,0.0
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,332.0,0.0,...,0.0,0.0,0.0,139.0,0.0,217.0,0.0,2176.0,0.0,0.0
1002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,375.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2247.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,157.0,1349.0,0.0,0.0
4998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,495.0,...,0.0,0.0,0.0,453.0,0.0,0.0,0.0,0.0,544.0,0.0


In [57]:
def encode(x):
    if x <= 0:
        return 0
    if x >= 0:
        return 1

basket_set = basket.applymap(encode)    

In [61]:
basket_set.head()

Product Description,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Product ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
1001,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,1,0,0
1002,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1003,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


# Training Model

In [67]:
frequent = apriori(basket_set, min_support = 0.07,use_colnames=True)

In [68]:
rules = association_rules(frequent, metric = 'lift', min_threshold =1)

In [69]:
rules.head(100)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(whole milk),(bottled beer),0.458184,0.158799,0.085428,0.186450,1.174124,0.012669,1.033988
1,(bottled beer),(whole milk),0.158799,0.458184,0.085428,0.537964,1.174124,0.012669,1.172672
2,(other vegetables),(bottled water),0.376603,0.213699,0.093894,0.249319,1.166680,0.013414,1.047450
3,(bottled water),(other vegetables),0.213699,0.376603,0.093894,0.439376,1.166680,0.013414,1.111969
4,(rolls/buns),(bottled water),0.349666,0.213699,0.079271,0.226706,1.060863,0.004548,1.016820
...,...,...,...,...,...,...,...,...,...
93,"(whole milk, yogurt)",(other vegetables),0.150590,0.376603,0.071832,0.477002,1.266589,0.015119,1.191967
94,"(other vegetables, yogurt)",(whole milk),0.120318,0.458184,0.071832,0.597015,1.303003,0.016704,1.344507
95,(whole milk),"(other vegetables, yogurt)",0.458184,0.120318,0.071832,0.156775,1.303003,0.016704,1.043235
96,(other vegetables),"(whole milk, yogurt)",0.376603,0.150590,0.071832,0.190736,1.266589,0.015119,1.049608


# Recommendations

In [70]:
basket_set['whole milk'].sum()

1786

In [71]:
basket_set['bottled beer'].sum()

619

In [85]:
rules[(rules['lift'] >= 1) & (rules['confidence'] >= 0.5)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(bottled beer),(whole milk),0.158799,0.458184,0.085428,0.537964,1.174124,0.012669,1.172672
9,(bottled water),(whole milk),0.213699,0.458184,0.112365,0.52581,1.147597,0.014452,1.142615
11,(canned beer),(whole milk),0.165213,0.458184,0.087224,0.52795,1.152268,0.011526,1.147795
19,(domestic eggs),(whole milk),0.133145,0.458184,0.070292,0.527938,1.152242,0.009287,1.147766
21,(newspapers),(whole milk),0.139815,0.458184,0.072345,0.517431,1.12931,0.008284,1.122775
39,(other vegetables),(whole milk),0.376603,0.458184,0.19138,0.508174,1.109106,0.018827,1.101643
43,(pastry),(whole milk),0.177527,0.458184,0.091072,0.513006,1.119651,0.009732,1.112572
45,(pip fruit),(whole milk),0.1706,0.458184,0.086968,0.509774,1.112598,0.008801,1.105239
55,(rolls/buns),(whole milk),0.349666,0.458184,0.178553,0.510638,1.114484,0.018342,1.10719
67,(sausage),(whole milk),0.206003,0.458184,0.106978,0.519303,1.133394,0.012591,1.127146
