In [None]:
## Apriori Association Rule Learning

# business problem - buy one product + get one product for free

In [13]:
# Importing the libraries

!pip install apyori

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd



In [23]:
# Importing the dataset

dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
# apriori() needs dataset to be in a list of transactions format, not a data frame
# all the elements need to be strings -> str

transactions = []
for i in range(0,7501):
    transactions.append([str(dataset.values[i,j]) for j in range(0,20)])

#transactions = dataset.values.tolist() # doesn't work with data types when casting rules as list !
#print(transactions)

In [24]:
# Fitting (training) the model to the dataset

# min_support - minimal support so we don't have to compute all the rules but only the relevant ones
# support for 2 products - (no of transactions containing these 2 products)/(total no of transactions) -> (3x7)/7501
# we only consider products appearing 3x a week

# min_confidence - if 0.8 - world should be correct 80% of the time - too high

# lift - most valuable metric to measure trends of the rule

# min_length, max_length - number of elements in the rule - 2 = 1 on each side

from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

In [25]:
# Visualisation of the results

# Apriori results
results = list(rules)
results

# i.e. if someone buys items_base=frozenset({'light cream'}), there is 29% chance they will buy items_add=frozenset({'chicken'}), (confidence=0.29059)

# Apriori results -> Pandas DataFrame
# tuples - collections of data in the single variable

# result[0] - element of index 0: [RelationRecord(items=frozenset({'chicken', 'light cream'}),
# result[1] - element of index 1: support=0.004532728969470737,
# result[2] - element of index 2: ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),

# result[2][0] - OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'})

# result[2][0][1] - items_add=frozenset({'chicken'})

# tuple(result[2][0][1])[0] - data inside the element of index 1 inside of the element of index 2: chicken

def inspect(results):
    left = tuple(result[2][0][0])[0] for result in results
    right = tuple(result[2][0][1])[0] for result in results
    supports = result[1] for result in results
    confidences = result[2][0][2] for result in results
    lifts = result[2][0][3] for result in results
return list(zip(left, right, supports, confidences, lifts)) # returns the list of results separately

# we will get a table with rules information
results_as_data_frame = pd.DataFrame(inspect(results), columns = ['Left side of the rule', 'Right side of the rule', 'Support', 'Confidence', 'Lifts'])

# Visualisation of the results - nonsorted

# Visualisation of the results - sorted by desc lifts

[RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0