In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
from apyori import apriori

In [2]:
# Load the dataset, assuming each row is a transaction, and each column represents an item bought
groceries_data = pd.read_csv("groceries.csv", header=None)

In [3]:
# Show a few rows of the dataset
display(groceries_data.head())
print(groceries_data.shape)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,Item 10,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
1,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,,...,,,,,,,,,,
2,tropical fruit,yogurt,coffee,,,,,,,,...,,,,,,,,,,
3,whole milk,,,,,,,,,,...,,,,,,,,,,
4,pip fruit,yogurt,cream cheese,meat spreads,,,,,,,...,,,,,,,,,,


(9836, 32)


In [4]:
# Prepare the records for the Apriori algorithm
# Each row will be a list of items bought in a particular transaction
records = []
for i in range(0, len(groceries_data)):
    records.append([str(groceries_data.values[i, j]) for j in range(0, groceries_data.shape[1]) if str(groceries_data.values[i, j]) != 'nan'])  # Excludes 'NaN'

In [5]:
# Check the type of records
print(type(records))

<class 'list'>


In [6]:
# Apply the Apriori algorithm
# Adjust min_support, min_confidence, and min_lift based on your dataset's size and desired granularity
association_rules = apriori(records, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2)

In [7]:
# Convert the association rules into a list for easier interpretation
association_results = list(association_rules)
print(f"There are {len(association_results)} association rules derived.")

There are 225 association rules derived.


In [8]:
# Print the rules in a readable format
for item in association_results:
    pair = item[0]                 # 'item[0]' contains the base item and the add-on item (the rule)
    items = [x for x in pair]

In [9]:
print(f"Rule: {items[0]} -> {items[2]}")

Rule: tropical fruit -> root vegetables


In [10]:
# 'item[1]' contains the support of the rule
print(f"Support: {item[1]}")

Support: 0.0035583570557137048


In [11]:
# 'item[2][0][2]' contains the confidence of the rule
print(f"Confidence: {item[2][0][2]}")

Confidence: 0.2892561983471074


In [12]:
# 'item[2][0][3]' contains the lift of the rule
print(f"Lift: {item[2][0][3]}")

Lift: 5.1635643683160595
