### Mounting Drive

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Importing Dataset


In [None]:
import numpy as np
import pandas as pd

dataset = pd.read_csv(
    "/content/drive/MyDrive/Dataset/MLL/Market_Basket_Optimisation.csv", header=None
)
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [None]:
dataset.shape

(7501, 20)

### Converting Pandas DataFrame into Lists of Transactions

In [None]:
transactions = []
for i in range(0, 7501):
    # transactions.append([str(dataset.values[i,j]) for j in range(0, 32)])
    transactions.append(
        [
            str(dataset.values[i, j])
            for j in range(0, 20)
            if str(dataset.values[i, j]) != "None"
        ]
    )
for i in range(0, 5):
    print(transactions[i])

['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil']
['burgers', 'meatballs', 'eggs', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['chutney', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['turkey', 'avocado', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']


### Training the Apriori Model on the Dataset

In [None]:
!pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from apyori import apriori

assotiation_rules = apriori(
    transactions=transactions,
    min_support=0.002,
    min_confidence=0.2,
    min_lift=3,
    min_length=2,
    max_length=2,
)
# assotiation_rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 4, min_length = 5, max_length = 8)

In [None]:
results = list(assotiation_rules)
results

[RelationRecord(items=frozenset({'turkey', 'barbecue sauce'}), support=0.0025329956005865884, ordered_statistics=[OrderedStatistic(items_base=frozenset({'barbecue sauce'}), items_add=frozenset({'turkey'}), confidence=0.2345679012345679, lift=3.751585985416831)]),
 RelationRecord(items=frozenset({'chicken', 'extra dark chocolate'}), support=0.0027996267164378083, ordered_statistics=[OrderedStatistic(items_base=frozenset({'extra dark chocolate'}), items_add=frozenset({'chicken'}), confidence=0.23333333333333334, lift=3.8894074074074076)]),
 RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=f

In [None]:
# Number of rules
print(len(results))

13


### Putting the Results Well Organised into a Pandas DataFrame

In [None]:
def inspect(results):
    lhs = [tuple(result[2][0][0])[0] for result in results]
    rhs = [tuple(result[2][0][1])[0] for result in results]
    supports = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))


results_DataFrame = pd.DataFrame(
    inspect(results),
    columns=["Left Hand Side", "Right Hand Side", "Support", "Confidence", "Lift"],
)

In [None]:
results_DataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,barbecue sauce,turkey,0.002533,0.234568,3.751586
1,extra dark chocolate,chicken,0.0028,0.233333,3.889407
2,light cream,chicken,0.004533,0.290598,4.843951
3,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,pasta,escalope,0.005866,0.372881,4.700812
5,extra dark chocolate,olive oil,0.002666,0.222222,3.374269
6,fromage blanc,honey,0.003333,0.245098,5.164271
7,gluten free bar,pancakes,0.002133,0.307692,3.237027
8,herb & pepper,ground beef,0.015998,0.32345,3.291994
9,tomato sauce,ground beef,0.005333,0.377358,3.840659


### Displaying the Results Sorted by Support

In [None]:
results_DataFrame = results_DataFrame.sort_values("Support", ascending=False)
results_DataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
8,herb & pepper,ground beef,0.015998,0.32345,3.291994
11,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
4,pasta,escalope,0.005866,0.372881,4.700812
3,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
9,tomato sauce,ground beef,0.005333,0.377358,3.840659
12,pasta,shrimp,0.005066,0.322034,4.506672
2,light cream,chicken,0.004533,0.290598,4.843951
6,fromage blanc,honey,0.003333,0.245098,5.164271
10,light cream,olive oil,0.0032,0.205128,3.11471
1,extra dark chocolate,chicken,0.0028,0.233333,3.889407
