### Apriori Learning Background

In [2]:
# Background:
# Given some information, we can predict some new information
# E.g. people who buy bread also tend to buy milk
# E.g. people who watch Star Wars 1, tend to watch Star Wars 2

# Definitions:
# Support(M) = # of users who watched movie M / Total # of users
# Confidence(M1 -> M2) = # users who watched M1 & M2 / Total # who watched M1
# Lift(M1 -> M2) = Confidence(M1 -> M2) / Support(M2)

# Algorithm
# 1) Set up minimum support and confidence
# 2) Take all subsets in transactions with support higher than minimum
# 3) Take all the rules of these subsets having higher confidence than minimum
# 4) Sort the rules by decreasing lift


### Importing libraries

In [3]:
!pip install apyori



In [4]:
import numpy as np
import pandas as pd

### Data Preprocessing

In [7]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
transactions = [] #initialize empty list
observations = len(dataset)
columns = len(dataset.columns)
for i in range(0, observations):
  transactions.append([str(dataset.values[i,j]) for j in range(0, columns)])
# we store our transactions as a python list because 
# our training function will expect listed input

### Training the apriori model on the dataset

In [11]:
from apyori import apriori
rules = apriori(transactions=transactions, min_support=0.003, 
                min_confidence=0.2, min_lift=3, max_length=2, min_length=2)
# Min_support parameter establishes a minimum frequency of
# items in the same transaction

### Visualizing results

In [12]:
results = list(rules)
print(results)

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]), RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]), RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]), RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.24

### Displaying results in a Pandas DataFrame

In [14]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])
resultsinDataFrame.head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994


In [15]:
resultsinDataFrame.nlargest(n=10, keep="first", columns="Lift")

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471
