### Mounting Drive

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Importing Dataset


In [None]:
import numpy as np
import pandas as pd

dataset = pd.read_csv(
    "/content/drive/MyDrive/Dataset/MLL/groceries.csv", sep="\n", header=None
)[0].str.split(",", expand=True)
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,,...,,,,,,,,,,
1,tropical fruit,yogurt,coffee,,,,,,,,...,,,,,,,,,,
2,whole milk,,,,,,,,,,...,,,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,,,...,,,,,,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,,,...,,,,,,,,,,


In [None]:
dataset.shape

(9835, 32)

### Converting Pandas DataFrame into Lists of Transactions

In [None]:
transactions = []
for i in range(0, 9835):
    # transactions.append([str(dataset.values[i,j]) for j in range(0, 32)])
    transactions.append(
        [
            str(dataset.values[i, j])
            for j in range(0, 32)
            if str(dataset.values[i, j]) != "None"
        ]
    )
for i in range(0, 5):
    print(transactions[i])

['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups']
['tropical fruit', 'yogurt', 'coffee']
['whole milk']
['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads']
['other vegetables', 'whole milk', 'condensed milk', 'long life bakery product']


### Training the Apriori Model on the Dataset

In [None]:
!pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from apyori import apriori

# assotiation_rules = apriori(transactions = transactions, min_support = 0.002, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
assotiation_rules = apriori(
    transactions=transactions,
    min_support=0.003,
    min_confidence=0.2,
    min_lift=4,
    min_length=5,
    max_length=8,
)

In [None]:
results = list(assotiation_rules)
results

[RelationRecord(items=frozenset({'Instant food products', 'hamburger meat'}), support=0.003050330452465684, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Instant food products'}), items_add=frozenset({'hamburger meat'}), confidence=0.379746835443038, lift=11.42143769597027)]),
 RelationRecord(items=frozenset({'bottled beer', 'liquor'}), support=0.004677173360447382, ordered_statistics=[OrderedStatistic(items_base=frozenset({'liquor'}), items_add=frozenset({'bottled beer'}), confidence=0.4220183486238532, lift=5.240594013529793)]),
 RelationRecord(items=frozenset({'sugar', 'flour'}), support=0.00498220640569395, ordered_statistics=[OrderedStatistic(items_base=frozenset({'flour'}), items_add=frozenset({'sugar'}), confidence=0.28654970760233917, lift=8.46311223504206)]),
 RelationRecord(items=frozenset({'processed cheese', 'white bread'}), support=0.004168784951703101, ordered_statistics=[OrderedStatistic(items_base=frozenset({'processed cheese'}), items_add=frozenset({'whit

In [None]:
# Number of rules
print(len(results))

64


### Putting the Results Well Organised into a Pandas DataFrame

In [None]:
def inspect(results):
    lhs = [tuple(result[2][0][0])[0] for result in results]
    rhs = [tuple(result[2][0][1])[0] for result in results]
    supports = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

results_DataFrame = pd.DataFrame(
    inspect(results),
    columns=["Left Hand Side", "Right Hand Side", "Support", "Confidence", "Lift"],
)

In [None]:
results_DataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,Instant food products,hamburger meat,0.003050,0.379747,11.421438
1,liquor,bottled beer,0.004677,0.422018,5.240594
2,flour,sugar,0.004982,0.286550,8.463112
3,processed cheese,white bread,0.004169,0.251534,5.975445
4,citrus fruit,root vegetables,0.003864,0.457831,4.200346
...,...,...,...,...,...
59,root vegetables,whole milk,0.005694,0.270531,4.828814
60,tropical fruit,whole milk,0.003152,0.226277,4.038907
61,whipped/sour cream,whole milk,0.004372,0.316176,5.643549
62,other vegetables,tropical fruit,0.003152,0.303922,7.185261


### Displaying the Results Sorted by Support

In [None]:
results_DataFrame = results_DataFrame.sort_values("Support", ascending=False)
results_DataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
52,root vegetables,other vegetables,0.007829,0.303150,4.050919
48,root vegetables,other vegetables,0.007016,0.333333,4.454257
11,other vegetables,whipped/sour cream,0.005796,0.289340,4.036397
30,other vegetables,root vegetables,0.005796,0.200704,4.103796
59,root vegetables,whole milk,0.005694,0.270531,4.828814
...,...,...,...,...,...
15,whole milk,chocolate,0.003050,0.225564,4.545945
22,butter,other vegetables,0.003050,0.315789,4.219823
25,butter,other vegetables,0.003050,0.306122,7.050853
27,root vegetables,whole milk,0.003050,0.236220,4.216385
