In [2]:
# Import pandas under the alias pd
import pandas as pd
groceries_path = 'https://assets.datacamp.com/production/repositories/5654/datasets/5992818fd324b0de7d48311ee43fa038f7614ee5/small_grocery_store.csv'

# Load transactions from pandas
groceries = pd.read_csv(groceries_path)
groceries.head()

Unnamed: 0,Transaction
0,"milk,bread,biscuit"
1,"bread,milk,biscuit,cereal"
2,"bread,tea"
3,"jam,bread,milk"
4,"tea,biscuit"


In [3]:
# Split transaction strings into lists
transactions = groceries['Transaction'].apply(lambda t: t.split(','))

# Convert DataFrame column into list of strings
transactions = list(transactions)

# Print the list of transactions
(transactions)

[['milk', 'bread', 'biscuit'],
 ['bread', 'milk', 'biscuit', 'cereal'],
 ['bread', 'tea'],
 ['jam', 'bread', 'milk'],
 ['tea', 'biscuit'],
 ['bread', 'tea'],
 ['tea', 'cereal'],
 ['bread', 'tea', 'biscuit'],
 ['jam', 'bread', 'tea'],
 ['bread', 'milk'],
 ['coffee', 'orange', 'biscuit', 'cereal'],
 ['coffee', 'orange', 'biscuit', 'cereal'],
 ['coffee', 'sugar'],
 ['bread', 'coffee', 'orange'],
 ['bread', 'sugar', 'biscuit'],
 ['coffee', 'sugar', 'cereal'],
 ['bread', 'sugar', 'biscuit'],
 ['bread', 'coffee', 'sugar'],
 ['bread', 'coffee', 'sugar'],
 ['tea', 'milk', 'coffee', 'cereal']]

In [4]:
# Import permutations from the itertools module
from itertools import permutations

# Define the set of groceries
flattened = [i for t in transactions for i in t]
groceries = list(set(flattened))

# Generate all possible rules from groceries list
rules = list(permutations(groceries, 2))

# Print the set of rules
print(rules)

# Print the number of rules
print(len(rules))

[('milk', 'coffee'), ('milk', 'cereal'), ('milk', 'sugar'), ('milk', 'tea'), ('milk', 'biscuit'), ('milk', 'bread'), ('milk', 'orange'), ('milk', 'jam'), ('coffee', 'milk'), ('coffee', 'cereal'), ('coffee', 'sugar'), ('coffee', 'tea'), ('coffee', 'biscuit'), ('coffee', 'bread'), ('coffee', 'orange'), ('coffee', 'jam'), ('cereal', 'milk'), ('cereal', 'coffee'), ('cereal', 'sugar'), ('cereal', 'tea'), ('cereal', 'biscuit'), ('cereal', 'bread'), ('cereal', 'orange'), ('cereal', 'jam'), ('sugar', 'milk'), ('sugar', 'coffee'), ('sugar', 'cereal'), ('sugar', 'tea'), ('sugar', 'biscuit'), ('sugar', 'bread'), ('sugar', 'orange'), ('sugar', 'jam'), ('tea', 'milk'), ('tea', 'coffee'), ('tea', 'cereal'), ('tea', 'sugar'), ('tea', 'biscuit'), ('tea', 'bread'), ('tea', 'orange'), ('tea', 'jam'), ('biscuit', 'milk'), ('biscuit', 'coffee'), ('biscuit', 'cereal'), ('biscuit', 'sugar'), ('biscuit', 'tea'), ('biscuit', 'bread'), ('biscuit', 'orange'), ('biscuit', 'jam'), ('bread', 'milk'), ('bread', 'co

In [5]:
# !pip install mlxtend

In [6]:
# Import the transaction encoder function from mlxtend
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd

# Instantiate transaction encoder and identify unique items in transactions
encoder = TransactionEncoder().fit(transactions)

# One-hot encode transactions
onehot = encoder.transform(transactions)

# Convert one-hot encoded data to DataFrame
onehot = pd.DataFrame(onehot, columns = encoder.columns_)

# Print the one-hot encoded transaction dataset
print(onehot)

    biscuit  bread  cereal  coffee    jam   milk  orange  sugar    tea
0      True   True   False   False  False   True   False  False  False
1      True   True    True   False  False   True   False  False  False
2     False   True   False   False  False  False   False  False   True
3     False   True   False   False   True   True   False  False  False
4      True  False   False   False  False  False   False  False   True
5     False   True   False   False  False  False   False  False   True
6     False  False    True   False  False  False   False  False   True
7      True   True   False   False  False  False   False  False   True
8     False   True   False   False   True  False   False  False   True
9     False   True   False   False  False   True   False  False  False
10     True  False    True    True  False  False    True  False  False
11     True  False    True    True  False  False    True  False  False
12    False  False   False    True  False  False   False   True  False
13    

In [7]:
# Compute the support
support = onehot.mean()

# Print the support
print(support)

biscuit    0.40
bread      0.65
cereal     0.30
coffee     0.40
jam        0.10
milk       0.25
orange     0.15
sugar      0.30
tea        0.35
dtype: float64


In [8]:
import numpy as np

# Add a jam+bread column to the DataFrame onehot
onehot['jam+bread'] = np.logical_and(onehot['jam'], onehot['bread'])

# Compute the support
support = onehot.mean()

# Print the support values
print(support)

biscuit      0.40
bread        0.65
cereal       0.30
coffee       0.40
jam          0.10
milk         0.25
orange       0.15
sugar        0.30
tea          0.35
jam+bread    0.10
dtype: float64
