# Apriori

## Importing the libraries

In [2]:
%pip install apyori


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [4]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None) # first row contains data, so no header is needed

order_count = dataset.shape[0] # number of orders in the dataset
largest_order = dataset.shape[1] # number of items in the largest order
                     
transactions = []

for i in range(0, order_count):
  transactions.append([str(dataset.values[i, item]) for item in range(0, largest_order)]) # append each row to the transactions list. Convert each item to a string.

## Training the Apriori model on the dataset

In [5]:
from apyori import apriori

minimum_support = 3 * 7 / order_count # only consider items that appear at least 3 times a day (3 * 7) out of the total number of orders
minimum_confidence = 0.2 # Typical value for confidence is 0.8, but to find an association between items, we halve it.

rules = apriori(transactions = transactions, 
                min_support = minimum_support, 
                min_confidence = minimum_confidence, 
                min_lift = 3, # min_lift is the minimum threshold for the lift of the association rule.
                min_length = 2, max_length = 2) # min_length and max_length are the number of items in the association rule (want to find associations between 2 items).

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [6]:
results = list(rules) # convert the rules into a list

In [7]:
results

[RelationRecord(items=frozenset({'extra dark chocolate', 'chicken'}), support=0.0027996267164378083, ordered_statistics=[OrderedStatistic(items_base=frozenset({'extra dark chocolate'}), items_add=frozenset({'chicken'}), confidence=0.23333333333333334, lift=3.8894074074074076)]),
 RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'esca

### Putting the results well organised into a Pandas DataFrame

In [8]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

# Displaying the results sorted by descending lifts. Creates a dataframe of the results.

### Displaying the results non sorted

In [9]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,extra dark chocolate,chicken,0.0028,0.233333,3.889407
1,light cream,chicken,0.004533,0.290598,4.843951
2,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
3,pasta,escalope,0.005866,0.372881,4.700812
4,fromage blanc,honey,0.003333,0.245098,5.164271
5,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,tomato sauce,ground beef,0.005333,0.377358,3.840659
7,light cream,olive oil,0.0032,0.205128,3.11471
8,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
9,pasta,shrimp,0.005066,0.322034,4.506672


### Displaying the results sorted by descending lifts

In [10]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift') # Displaying the top 10 results sorted by descending lifts.

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
4,fromage blanc,honey,0.003333,0.245098,5.164271
1,light cream,chicken,0.004533,0.290598,4.843951
3,pasta,escalope,0.005866,0.372881,4.700812
9,pasta,shrimp,0.005066,0.322034,4.506672
8,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
0,extra dark chocolate,chicken,0.0028,0.233333,3.889407
6,tomato sauce,ground beef,0.005333,0.377358,3.840659
2,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
5,herb & pepper,ground beef,0.015998,0.32345,3.291994
7,light cream,olive oil,0.0032,0.205128,3.11471
