# Apriori

## Importing the libraries

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import apyori  #to install apyori I need to have the apyori.py file into the folder where I'm working

## Data Preprocessing

In [17]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None) 

# header = None because our dataset doesn't have name of columns. All rows represent transactions

# Instead of having the data as a data frame, we want a list of transactions

#1) initialize an empty list
transactions = []

#2) Populate the list
for i in range(0, 7501):
    transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])
    
    
# i in range(0,7501) because we have 7501 transactions
# we just use the append method (== add) that will simply add the transactions one by one
# use the [] to make the transactions a list of products -- we are creating a list of list
# the second for loop will iterate over the columns

## Training the Apriori model on the dataset

In [30]:
# WARNING: Make sure to upload the apyori.py file into this Colab notebook before running this cell
from apyori import apriori

# We want to consider products that appear at least in 3 transactions a day. Since we record 7501 transactions in a week
# 3 times a day * 1 week = 3*7 = 21 minimum amount of times we saw a product during the week
# min_support = number of times / total transactions = 21/7501
# enter the rules with min-support higher than 0.003

# min_length/max = 2 ==> minimum/max elements we want to have in our rule ("Buy 2 get 1 for free")

rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3,
                min_length = 2, max_lenght = 2)

### Display the first results coming directly from the output of the apriori function

In [31]:
results = list(rules)

In [32]:
results

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

### Printing the results well organised into a Pandas DataFrame

In [33]:
# acess index[2]; in that element we acess index[0]; in that we acess the first element index[0]; the final [0] is to cacth what is inside

def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Display the results non sorted

In [34]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.323450,3.291994
...,...,...,...,...,...
149,,tomatoes,0.003066,0.255556,3.736690
150,,ground beef,0.003066,0.298701,3.040106
151,ground beef,spaghetti,0.003066,0.560976,3.221959
152,,olive oil,0.003333,0.211864,3.216994


### Display the results sorted by descending lifts

In [35]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
58,mineral water,olive oil,0.003866,0.402778,6.115863
128,,olive oil,0.003866,0.402778,6.115863
96,frozen vegetables,soup,0.003066,0.277108,5.484407
146,,soup,0.003066,0.277108,5.484407
3,fromage blanc,honey,0.003333,0.245098,5.164271
28,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
16,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
26,pasta,escalope,0.005866,0.372881,4.700812
