# Apriori

In [22]:
import pandas as pd 
import numpy as np
from apyori import apriori

## Loading Dataset

In [23]:
dataset = pd.read_csv("../Datasets/market_basket_optimisation.csv", header=None)

## Data Preprocessing

In [24]:
transactions = []
for i in range(0, 7501):  # 7501 = no of product rows (transactions)
    transactions.append([str(dataset.values[i, j]) for j in range(20)])  # 20 = maximum no of products in a row

## Training the apriori model on the dataset
- <b>support = no of times a product occurs in transactions (choose according to the situation) / total transactions<br>
- confidence = no of times a Product1 occurs with Product2 / total no of times Product1 occurs<br>
- lift = confidence / support<br><br>

- For this dataset: <br> </b>

- minimum support = no of times product occurs in transactions for a week / total no of transactions = (7 x 3) / 7501 = 0.003 

no of times = 3

days of week = 7

total transactions = 7501
    
- minimum confidence = 0.2 (Recommended)
- minimum lift = 3 (Recommended)

In [25]:
rules = apriori(transactions=transactions, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2, max_length=2)

In [26]:
rules = list(rules)
rules[0]

RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])

# Visualizing the Rules

In [27]:
def inspect(rules):
    lhs = [tuple(rule[2][0][0])[0] for rule in rules]
    rhs = [tuple(rule[2][0][1])[0] for rule in rules]
    supports = [rule[1] for rule in rules]
    confidence = [rule[2][0][2] for rule in rules]
    lifts = [rule[2][0][3] for rule in rules]
    return list(zip(lhs, rhs, supports, confidence, lifts))
rules = pd.DataFrame(inspect(rules), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Displaying the rules non sorted
- if user buy "light cream" then it might buy "chicken" as well
- if user buy "mushroom cream sauce" then it might buy "escalope" as well
- if user buy "pasta" then it might buy "escalope" as well
AND SO ON

In [31]:
rules

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
6,light cream,olive oil,0.0032,0.205128,3.11471
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
8,pasta,shrimp,0.005066,0.322034,4.506672


### Displaying the sorted rules 

In [32]:
rules.nlargest(n=5, columns='Lift')  # fetches first 5 sorted rows by Lift column (descending)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
