### Mounting Drive

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Importing Dataset


In [None]:
import numpy as np
import pandas as pd

dataset = pd.read_csv(
    "/content/drive/MyDrive/Dataset/MLL/groceries.csv", sep="\n", header=None
)[0].str.split(",", expand=True)
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,,...,,,,,,,,,,
1,tropical fruit,yogurt,coffee,,,,,,,,...,,,,,,,,,,
2,whole milk,,,,,,,,,,...,,,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,,,...,,,,,,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,,,...,,,,,,,,,,


In [None]:
dataset.shape

(9835, 32)

### Converting Pandas DataFrame into Lists of Transactions

In [None]:
transactions = []
for i in range(0, 9835):
    # transactions.append([str(dataset.values[i,j]) for j in range(0, 32)])
    transactions.append(
        [
            str(dataset.values[i, j])
            for j in range(0, 32)
            if str(dataset.values[i, j]) != "None"
        ]
    )
for i in range(0, 5):
    print(transactions[i])

['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups']
['tropical fruit', 'yogurt', 'coffee']
['whole milk']
['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads']
['other vegetables', 'whole milk', 'condensed milk', 'long life bakery product']


In [None]:
# converting the transaction list into a dataframe
data = pd.DataFrame(transactions)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,,...,,,,,,,,,,
1,tropical fruit,yogurt,coffee,,,,,,,,...,,,,,,,,,,
2,whole milk,,,,,,,,,,...,,,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,,,...,,,,,,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9830,sausage,chicken,beef,hamburger meat,citrus fruit,grapes,root vegetables,whole milk,butter,whipped/sour cream,...,,,,,,,,,,
9831,cooking chocolate,,,,,,,,,,...,,,,,,,,,,
9832,chicken,citrus fruit,other vegetables,butter,yogurt,frozen dessert,domestic eggs,rolls/buns,rum,cling film/bags,...,,,,,,,,,,
9833,semi-finished bread,bottled water,soda,bottled beer,,,,,,,...,,,,,,,,,,


### Training the Eclat Model on the Dataset

In [None]:
!pip install pyECLAT

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from pyECLAT import ECLAT

min_support = 0.01
min_n_products = 2
max_length = 2

# create an instance of eclat
my_eclat = ECLAT(data=data, verbose=True)

# fit the algorithm
rule_indices, rule_supports = my_eclat.fit(
    min_support=min_support, min_combination=min_n_products, max_combination=max_length
)

100%|██████████| 170/170 [00:04<00:00, 39.73it/s]
100%|██████████| 170/170 [00:00<00:00, 1806.18it/s]
100%|██████████| 170/170 [00:00<00:00, 2606.39it/s]


Combination 2 by 2


3828it [00:47, 80.72it/s]


In [None]:
print(rule_supports)

{'beef & whole milk': 0.02125063548551093, 'beef & rolls/buns': 0.01362480935434672, 'beef & other vegetables': 0.019725470259278087, 'beef & root vegetables': 0.017386883579054397, 'beef & yogurt': 0.011692933401118455, 'butter milk & whole milk': 0.011591255719369599, 'butter milk & other vegetables': 0.010371123538383325, 'sausage & bottled water': 0.011997966446365024, 'sausage & soda': 0.024300965937976614, 'sausage & pip fruit': 0.010777834265378749, 'sausage & fruit/vegetable juice': 0.010066090493136757, 'sausage & whole milk': 0.0298932384341637, 'sausage & pastry': 0.012506354855109304, 'sausage & brown bread': 0.010676156583629894, 'sausage & tropical fruit': 0.013929842399593289, 'sausage & rolls/buns': 0.030604982206405694, 'sausage & shopping bags': 0.015658362989323844, 'sausage & other vegetables': 0.026944585663446874, 'sausage & frankfurter': 0.010066090493136757, 'sausage & root vegetables': 0.01494661921708185, 'sausage & yogurt': 0.019623792577529234, 'sausage & ci

In [None]:
results = rule_supports
results

{'beef & whole milk': 0.02125063548551093,
 'beef & rolls/buns': 0.01362480935434672,
 'beef & other vegetables': 0.019725470259278087,
 'beef & root vegetables': 0.017386883579054397,
 'beef & yogurt': 0.011692933401118455,
 'butter milk & whole milk': 0.011591255719369599,
 'butter milk & other vegetables': 0.010371123538383325,
 'sausage & bottled water': 0.011997966446365024,
 'sausage & soda': 0.024300965937976614,
 'sausage & pip fruit': 0.010777834265378749,
 'sausage & fruit/vegetable juice': 0.010066090493136757,
 'sausage & whole milk': 0.0298932384341637,
 'sausage & pastry': 0.012506354855109304,
 'sausage & brown bread': 0.010676156583629894,
 'sausage & tropical fruit': 0.013929842399593289,
 'sausage & rolls/buns': 0.030604982206405694,
 'sausage & shopping bags': 0.015658362989323844,
 'sausage & other vegetables': 0.026944585663446874,
 'sausage & frankfurter': 0.010066090493136757,
 'sausage & root vegetables': 0.01494661921708185,
 'sausage & yogurt': 0.0196237925775

In [None]:
print(len(results))

213


### Putting the Results Well Organised into a Pandas DataFrame

In [None]:
results_DataFrame = pd.DataFrame(rule_supports.items(), columns=["Products", "Support"])

In [None]:
results_DataFrame

Unnamed: 0,Products,Support
0,beef & whole milk,0.021251
1,beef & rolls/buns,0.013625
2,beef & other vegetables,0.019725
3,beef & root vegetables,0.017387
4,beef & yogurt,0.011693
...,...,...
208,frankfurter & yogurt,0.011185
209,root vegetables & yogurt,0.025826
210,root vegetables & citrus fruit,0.017692
211,yogurt & cream cheese,0.012405


### Displaying the Results Sorted by Support

In [None]:
results_DataFrame = results_DataFrame.sort_values("Support", ascending=False)
results_DataFrame

Unnamed: 0,Products,Support
120,whole milk & other vegetables,0.074835
103,whole milk & rolls/buns,0.056634
125,whole milk & yogurt,0.056024
124,whole milk & root vegetables,0.048907
201,other vegetables & root vegetables,0.047382
...,...,...
153,tropical fruit & napkins,0.010066
86,waffles & other vegetables,0.010066
18,sausage & frankfurter,0.010066
10,sausage & fruit/vegetable juice,0.010066


In [None]:
from google.colab import data_table

data_table.DataTable(results_DataFrame)

Unnamed: 0,Products,Support
120,whole milk & other vegetables,0.074835
103,whole milk & rolls/buns,0.056634
125,whole milk & yogurt,0.056024
124,whole milk & root vegetables,0.048907
201,other vegetables & root vegetables,0.047382
...,...,...
153,tropical fruit & napkins,0.010066
86,waffles & other vegetables,0.010066
18,sausage & frankfurter,0.010066
10,sausage & fruit/vegetable juice,0.010066
