# Implementing Apriori Algorithm for Association Rule Mining

In [12]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [13]:
#Importing the data sets
data = pd.read_csv("Apriori _ Market_Basket_Optimisation.csv")

In [14]:
data.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,nut,lemon,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,iced tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,


In [15]:
#checking the the rows and columns of the datasets
data.shape

(7501, 20)

# Install apyori library

In [16]:
!pip install apyori



# Data Pre-Proccessing

##### The input data for Apriori should be the list, not the pandas dataframe. So we need to convert our dataframe into list which contains sublists. For this we need to create a loop which will go through all rows and all columns.

At first we need to create an empty list. After creating an empty list we need to append the list with the elements in our dataset converted into string using loop. We have 7501 rows and 20 columns. So variable i should start from 0 and go to 7501. Then, for each row we need to look at 20 columns. Thats why we are using the second for cycle inside the loop which starts from 0 and goes to 20

In [17]:
#Let's create an empty list here
list_of_transactions = []
#Append the list
for i in range(0, 7501):
    list_of_transactions.append([str(data.values[i,j]) for j in range(0, 20)])

In [18]:
#Let's see the first element from our list of transactions. We should indicate 0 here because index in Pythn starts with 0
list_of_transactions[0]

['shrimp',
 'nut',
 'lemon',
 'vegetables mix',
 'green grapes',
 'whole weat flour',
 'yams',
 'cottage cheese',
 'energy drink',
 'tomato juice',
 'low fat yogurt',
 'iced tea',
 'honey',
 'salad',
 'mineral water',
 'salmon',
 'antioxydant juice',
 'frozen smoothie',
 'spinach',
 'olive oil']

# Training Apriori Algorithm

In [19]:
# Training apiori algorithm on our list_of_transactions
from apyori import apriori
rules = apriori(list_of_transactions, min_support = 0.004, min_confidence = 0.2, min_lift = 3, min_length = 2)
#So we will train apriori algorithm on our list_of_transactions and
#get the rules where items appear together

In [20]:
# Create a list of rules and print the results
results = list(rules)

In [21]:
#Here is the first rule in list or results
results[0]

RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])

##### Let's discuss the first rule -> {'chicken', 'light cream'} with support=0.0045, confidence=0.291 and lift=4.84. Please pay attention to that: items_base is {'light cream'} and items_add is {'chicken'}. This means that there is 29% chance (confidence) that user will buy chicken if he has already bought light cream. So left hand side is light cream and right hand side is chicken.

# Putting the results into a Pandas DataFrame

In [22]:
#In order to visualize our rules better we need to extract elements from our
#results list, convert it to pd.data frame and sort strong rules by lift value.
#Here is the code for this. We have extracted left hand side and right hand 
#side items from our rules above, also their support, confidence and lift value
def inspect(results):
    lhs     =  [tuple(result [2] [0] [0]) [0] for result in results]
    rhs     =  [tuple(result [2] [0] [1]) [0] for result in results]
    supports = [result [1] for result in results]
    confidences = [result [2] [0] [2]   for result in results]
    lifts = [result [2] [0] [3]   for result in results]
    return list(zip(lhs,rhs,supports,confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results),columns = 
    ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'] )
resultsinDataFrame.head(3)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812


In [23]:
#As we have our rules in pd.dataframe we can sort it by lift value using 
#nlargest command. Here we are saying that we need top 6 rule by lift value
resultsinDataFrame.nlargest(n=6, columns='Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
7,light cream,,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
12,pasta,,0.005866,0.372881,4.700812
30,pasta,,0.005066,0.322034,4.515096
6,pasta,shrimp,0.005066,0.322034,4.506672
