In [1]:
# The Apriori algorithm works by iteratively finding frequent itemsets in a dataset and generating association rules based on these itemsets.
# Here's a step-by-step explanation of how Apriori works:
# Initialization:
# Single Itemsets: The algorithm begins by identifying all unique single items (itemsets of size 1) in the dataset.
# Counting Support: It counts the support (frequency of occurrence) of each single itemset by scanning the entire dataset

In [None]:
# Generating Candidate Itemsets:

# Join Step: Based on the frequent itemsets from the previous iteration, the algorithm generates candidate itemsets of size 
# 𝑘
# +
# 1
# k+1 by joining 
# 𝑘
# k-sized frequent itemsets.
# For example, if {A, B} and {A, C} are frequent itemsets of size 2, then a candidate itemset of size 3 like {A, B, C} is generated.
# Pruning: After generating candidate itemsets, the algorithm prunes any candidate itemset that contains subsets of size 
# 𝑘
# k that are not frequent. This is based on the Apriori principle that if an itemset is infrequent, all its supersets will also be infrequent.

In [None]:
# Counting Support of Candidate Itemsets:
# The algorithm scans the dataset again to count the support of each candidate itemset (how many transactions contain the itemset).

In [None]:
# Filtering Frequent Itemsets:
# The candidate itemsets whose support meets or exceeds a minimum support threshold are retained as frequent itemsets.

In [None]:
# Repeat:

# Steps 2 to 4 are repeated iteratively to find frequent itemsets of larger sizes until no more frequent itemsets can be found.

In [None]:
# Generating Association Rules:

# Once all frequent itemsets are found, association rules are generated from them. An association rule typically has a form 
# 𝑋
# →
# 𝑌
# X→Y, where 
# 𝑋
# X and 
# 𝑌
# Y are itemsets and 
# 𝑋
# ∪
# 𝑌
# X∪Y is a frequent itemset.
# The confidence of each association rule is calculated to measure the strength of the rule.
# Confidence measures how often the rule has been found to be true.

In [None]:
# Evaluation and Selection:
# Association rules are evaluated based on metrics like confidence and support. 
# Rules that meet specified criteria (e.g.,minimum confidence threshold) are selected as meaningful associations.

In [2]:
# Apriori is efficient for mining frequent itemsets because it reduces the number of candidate itemsets that need to be examined in each iteration,
# using the Apriori principle to prune the search space. However,
# it may still be computationally intensive for large datasets or datasets with many unique items.

In [11]:
pip install apyori

Note: you may need to restart the kernel to use updated packages.


In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [13]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
transactions = []
for i in range(0, 7501):
  transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])

In [14]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

In [15]:
results = list(rules)

In [16]:
results

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

In [17]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [18]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
6,light cream,olive oil,0.0032,0.205128,3.11471
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
8,pasta,shrimp,0.005066,0.322034,4.506672


In [19]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471
