#### Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd 
import os
import zipfile
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as sns

### Association Rules

In [2]:
order_final = pd.read_csv('order_final.csv')

In [3]:
order_final.head()

Unnamed: 0.1,Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,...,product_ordered_count,product_reordered_count,product_reorder_ratio,time_btwn_next_order,department_ordered_count,department_reordered_count,department_reorder_ratio,aisle_ordered_count,aisle_reordered_count,aisle_reorder_ratio
0,0,1,49302,1,1,Bulgarian Yogurt,120,16,yogurt,dairy eggs,...,8,7,0.875,9.0,217051,146502,0.674966,55240,37939,0.686803
1,1,1,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,other creams cheeses,dairy eggs,...,144,107,0.743056,9.0,217051,146502,0.674966,12820,7493,0.584477
2,2,1,10246,3,0,Organic Celery Hearts,83,4,fresh vegetables,produce,...,1062,569,0.535782,9.0,409087,271886,0.664617,150609,91368,0.606657
3,3,1,49683,4,0,Cucumber Kirby,83,4,fresh vegetables,produce,...,2413,1709,0.708247,9.0,409087,271886,0.664617,150609,91368,0.606657
4,4,1,43633,5,1,Lightly Smoked Sardines in Olive Oil,95,15,canned meat seafood,canned goods,...,24,10,0.416667,9.0,46799,22782,0.486805,3241,1644,0.507251


In [4]:
order_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1384617 entries, 0 to 1384616
Data columns (total 27 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   Unnamed: 0                  1384617 non-null  int64  
 1   order_id                    1384617 non-null  int64  
 2   product_id                  1384617 non-null  int64  
 3   add_to_cart_order           1384617 non-null  int64  
 4   reordered                   1384617 non-null  int64  
 5   product_name                1384617 non-null  object 
 6   aisle_id                    1384617 non-null  int64  
 7   department_id               1384617 non-null  int64  
 8   aisle                       1384617 non-null  object 
 9   department                  1384617 non-null  object 
 10  user_id                     1384617 non-null  int64  
 11  eval_set                    1384617 non-null  object 
 12  order_number                1384617 non-null  int64  
 1

In [5]:
order_final.shape

(1384617, 27)

## Creating Basket for the products

In [6]:
order_final = order_final.drop(columns = ['Unnamed: 0'])

In [7]:
# Reducing the data set due to constant memory issues
order_final = order_final[:10000]

In [8]:
#Taking the count of product ordered
product_orderd = order_final.groupby(by=['product_id'],as_index=False).agg({'order_id':'count'})
product_orderd.head()

Unnamed: 0,product_id,order_id
0,1,1
1,10,2
2,23,1
3,25,1
4,34,2


In [9]:
#Renaming and sorting the df
product_orderd = product_orderd.rename(columns={'order_id': 'times_ordered'})
product_orderd = product_orderd.sort_values('times_ordered', ascending = False)
product_orderd.head(2)

Unnamed: 0,product_id,times_ordered
2316,24852,124
1962,21137,98


In [10]:
#Combining the products ordered with data in order_final data based on product_id
product_orderd = list(product_orderd.product_id)
product_orderd = order_final[order_final.product_id.isin(product_orderd)]
product_orderd.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,user_id,...,product_ordered_count,product_reordered_count,product_reorder_ratio,time_btwn_next_order,department_ordered_count,department_reordered_count,department_reorder_ratio,aisle_ordered_count,aisle_reordered_count,aisle_reorder_ratio
0,1,49302,1,1,Bulgarian Yogurt,120,16,yogurt,dairy eggs,112108,...,8,7,0.875,9.0,217051,146502,0.674966,55240,37939,0.686803
1,1,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,other creams cheeses,dairy eggs,112108,...,144,107,0.743056,9.0,217051,146502,0.674966,12820,7493,0.584477
2,1,10246,3,0,Organic Celery Hearts,83,4,fresh vegetables,produce,112108,...,1062,569,0.535782,9.0,409087,271886,0.664617,150609,91368,0.606657
3,1,49683,4,0,Cucumber Kirby,83,4,fresh vegetables,produce,112108,...,2413,1709,0.708247,9.0,409087,271886,0.664617,150609,91368,0.606657
4,1,43633,5,1,Lightly Smoked Sardines in Olive Oil,95,15,canned meat seafood,canned goods,112108,...,24,10,0.416667,9.0,46799,22782,0.486805,3241,1644,0.507251


In [11]:
product_orderd.shape

(10000, 26)

In [12]:
#Creating a pivot table using order_id and product_name
basket_product = product_orderd.pivot_table(columns='product_name', values='reordered', index='order_id').fillna(0)

In [13]:
basket_product.shape

(939, 4660)

In [14]:
basket_product.head()

product_name,0% Fat Organic Greek Vanilla Yogurt,0% Fat Superfruits Greek Yogurt,0% Greek Strained Yogurt,1 Apple + 1 Pear Fruit Bar,1 Liter,1 Ply Paper Towels,1% Chocolate Milk,1% Low Fat Milk,1% Lowfat Milk,"1,000 mg Vitamin C Lemon-Lime Flavored Fizzy Drink Mix - 30 PK",...,Zero Coke Soda Mini Bottles,Zero Cola,Zero Go-Go Mixed Berry Vitamin Water,Zero Soda,Zero Strawberry Lemon,Zucchini Squash,of Norwich Original English Mustard Powder Double Superfine,smartwater® Electrolyte Enhanced Water,vitaminwater® XXX Acai Blueberry Pomegranate,with Crispy Almonds Cereal
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
## Performing One Hot encoding on the data

def one_hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1
    
basket_encoded = basket_product.applymap(one_hot_encode)
basket_product = basket_encoded


In [16]:
basket_product.head()

product_name,0% Fat Organic Greek Vanilla Yogurt,0% Fat Superfruits Greek Yogurt,0% Greek Strained Yogurt,1 Apple + 1 Pear Fruit Bar,1 Liter,1 Ply Paper Towels,1% Chocolate Milk,1% Low Fat Milk,1% Lowfat Milk,"1,000 mg Vitamin C Lemon-Lime Flavored Fizzy Drink Mix - 30 PK",...,Zero Coke Soda Mini Bottles,Zero Cola,Zero Go-Go Mixed Berry Vitamin Water,Zero Soda,Zero Strawberry Lemon,Zucchini Squash,of Norwich Original English Mustard Powder Double Superfine,smartwater® Electrolyte Enhanced Water,vitaminwater® XXX Acai Blueberry Pomegranate,with Crispy Almonds Cereal
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
38,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Applying Association Rules on the basket

In [17]:
# Building the model using apriori with min_support=0.0075
freq_basket = apriori(basket_product, min_support = 0.008, use_colnames = True, verbose = 1)
freq_basket.shape

Processing 8010 combinations | Sampling itemset size 2Processing 327 combinations | Sampling itemset size 3


(112, 2)

In [18]:
freq_basket.head()

Unnamed: 0,support,itemsets
0,0.009585,(100% Lactose Free Fat Free Milk)
1,0.015974,(100% Whole Wheat Bread)
2,0.014909,(Apple Honeycrisp Organic)
3,0.013845,(Asparagus)
4,0.087327,(Bag of Organic Bananas)


In [19]:
freq_basket.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112 entries, 0 to 111
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   support   112 non-null    float64
 1   itemsets  112 non-null    object 
dtypes: float64(1), object(1)
memory usage: 1.9+ KB


In [20]:

product_ar = association_rules(freq_basket, metric ="lift", min_threshold = 1)

In [21]:
product_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Large Lemon),(Bag of Organic Bananas),0.046858,0.087327,0.00852,0.181818,2.08204,0.004428,1.115489
1,(Bag of Organic Bananas),(Large Lemon),0.087327,0.046858,0.00852,0.097561,2.08204,0.004428,1.056184
2,(Organic Baby Spinach),(Bag of Organic Bananas),0.059638,0.087327,0.01278,0.214286,2.453833,0.007572,1.161584
3,(Bag of Organic Bananas),(Organic Baby Spinach),0.087327,0.059638,0.01278,0.146341,2.453833,0.007572,1.101567
4,(Organic Hass Avocado),(Bag of Organic Bananas),0.045793,0.087327,0.014909,0.325581,3.728304,0.01091,1.353274
5,(Bag of Organic Bananas),(Organic Hass Avocado),0.087327,0.045793,0.014909,0.170732,3.728304,0.01091,1.150661
6,(Organic Lemon),(Bag of Organic Bananas),0.017039,0.087327,0.00852,0.5,5.72561,0.007032,1.825346
7,(Bag of Organic Bananas),(Organic Lemon),0.087327,0.017039,0.00852,0.097561,5.72561,0.007032,1.089227
8,(Organic Raspberries),(Bag of Organic Bananas),0.033014,0.087327,0.009585,0.290323,3.324548,0.006702,1.286039
9,(Bag of Organic Bananas),(Organic Raspberries),0.087327,0.033014,0.009585,0.109756,3.324548,0.006702,1.086204


### References:
- https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/
- https://www.kaggle.com/code/jboros/market-basket-analysis-with-association-rules  
- https://www.kaggle.com/code/jboros/market-basket-analysis-with-association-rules