In [1]:
! pip install mlxtend



In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from mlxtend.frequent_patterns import apriori, association_rules




# Association Rule for Store Dataset

In this case study, we will explore how association rule can be used to analyze the items that are usualy purcased together.

## Load Data

We will use the dataset of the transaction in a certain store. You can get the dataset here: 
https://gist.githubusercontent.com/Harsh-Git-Hub/2979ec48043928ad9033d8469928e751/raw/72de943e040b8bd0d087624b154d41b2ba9d9b60/retail_dataset.csv

In [3]:
# load the data set ans show the first five transaction
df = pd.read_csv("https://gist.githubusercontent.com/Harsh-Git-Hub/2979ec48043928ad9033d8469928e751/raw/72de943e040b8bd0d087624b154d41b2ba9d9b60/retail_dataset.csv")
df.head()

  and should_run_async(code)


Unnamed: 0,0,1,2,3,4,5,6
0,Bread,Wine,Eggs,Meat,Cheese,Pencil,Diaper
1,Bread,Cheese,Meat,Diaper,Wine,Milk,Pencil
2,Cheese,Meat,Eggs,Milk,Wine,,
3,Cheese,Meat,Eggs,Milk,Wine,,
4,Meat,Pencil,Wine,,,,


# Get the set of product that has been purchased


In [4]:
unique = set(pd.unique(df.values.ravel()))
print(unique)

{nan, 'Meat', 'Eggs', 'Pencil', 'Milk', 'Bread', 'Wine', 'Cheese', 'Bagel', 'Diaper'}


  and should_run_async(code)


## Preprocess Data

In this step, we will transform our dataset so that we will have a one hot encoding based on the purchased products.

In [5]:
#create an itemset based on the products
itemset = []
for i in range(0, 315):
    itemset.append([str(df.values[i,j]) for j in range(0, 7)])

# encoding the feature
from mlxtend.preprocessing import TransactionEncoder
a = TransactionEncoder()
a_data = a.fit(itemset).transform(itemset)

  and should_run_async(code)


In [6]:
# create new dataframe from the encoded features
dfencoded = pd.DataFrame(a_data,columns=a.columns_)
dfencoded = dfencoded.replace(False,0)
dfencoded = dfencoded.replace(True,1)

# show the new dataframe
dfencoded.head()

  and should_run_async(code)


Unnamed: 0,Bagel,Bread,Cheese,Diaper,Eggs,Meat,Milk,Pencil,Wine,nan
0,0,1,1,1,1,1,0,1,1,0
1,0,1,1,1,0,1,1,1,1,0
2,0,0,1,0,1,1,1,0,1,1
3,0,0,1,0,1,1,1,0,1,1
4,0,0,0,0,0,1,0,1,1,1


Since, the encoded dataframe consist of the empty column. We will drop the NaN column or select all columns other than the first column.

In [7]:
dfencoded = dfencoded.drop(['nan'], axis=1)

  and should_run_async(code)


In [9]:
dfencoded

  and should_run_async(code)


Unnamed: 0,Bagel,Bread,Cheese,Diaper,Eggs,Meat,Milk,Pencil,Wine
0,0,1,1,1,1,1,0,1,1
1,0,1,1,1,0,1,1,1,1
2,0,0,1,0,1,1,1,0,1
3,0,0,1,0,1,1,1,0,1
4,0,0,0,0,0,1,0,1,1
...,...,...,...,...,...,...,...,...,...
310,0,1,1,0,1,0,0,0,0
311,0,0,0,0,0,1,1,1,0
312,0,1,1,1,1,1,0,1,1
313,0,0,1,0,0,1,0,0,0


In [12]:
##Apriori Algorithm

  and should_run_async(code)


In [14]:
frequent_itemsets = apriori(dfencoded,min_support=0.2,use_colnames = True)
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.425397,(Bagel)
1,0.504762,(Bread)
2,0.501587,(Cheese)
3,0.406349,(Diaper)
4,0.438095,(Eggs)
5,0.47619,(Meat)
6,0.501587,(Milk)
7,0.361905,(Pencil)
8,0.438095,(Wine)
9,0.279365,"(Bread, Bagel)"


The we will generate association rule of the frequent itemset based on confidence level with the threshold=0.6

In [16]:
ass_rules = association_rules(frequent_itemsets,metric="confidence",min_threshold=0.6)
ass_rules

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bagel),(Bread),0.425397,0.504762,0.279365,0.656716,1.301042,0.064641,1.44265
1,(Eggs),(Cheese),0.438095,0.501587,0.298413,0.681159,1.358008,0.07867,1.563203
2,(Cheese),(Meat),0.501587,0.47619,0.32381,0.64557,1.355696,0.084958,1.477891
3,(Meat),(Cheese),0.47619,0.501587,0.32381,0.68,1.355696,0.084958,1.55754
4,(Milk),(Cheese),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148
5,(Cheese),(Milk),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148
6,(Wine),(Cheese),0.438095,0.501587,0.269841,0.615942,1.227986,0.050098,1.297754
7,(Eggs),(Meat),0.438095,0.47619,0.266667,0.608696,1.278261,0.05805,1.338624
8,"(Eggs, Cheese)",(Meat),0.298413,0.47619,0.215873,0.723404,1.519149,0.073772,1.893773
9,"(Eggs, Meat)",(Cheese),0.266667,0.501587,0.215873,0.809524,1.613924,0.082116,2.616667
