# Task 1 - Data Preparation
For this task, you will perform the following steps:
- Load all the necessary packages for this exercise
- Load the data

In [1]:
# Import 'numpy' and 'pandas' to work with numbers and dataframes
import numpy as np
import pandas as pd

# Import 'matplotlib.pyplot' and 'seaborn' for visualizations
from matplotlib import pyplot as plt
import seaborn as sns

# Import 'apriori' and 'association_rules' from 'mlxtend' for working with association rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
# Load the data and take a look at it
# Note: The data needs to be in binary matrix format
df = pd.read_csv('supermarket_binarymat.csv', index_col = 'transID')
df.head()

Unnamed: 0_level_0,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,berries,beverages,...,UHT-milk,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
transID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [3]:
# Convert the data into the Boolean data type
# Note: Converting data types to Boolean is not absolutely essential but it is computationally faster
df = df.astype(bool)
df.head()

Unnamed: 0_level_0,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,berries,beverages,...,UHT-milk,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
transID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


# Task 2 - Build a Collection of Frequent Itemsets
For this task, you will generate a collections of frequent itemsets for a specific minimum support threshold value

In [5]:
# Obtain frequent itemsets for the data using the 'apriori()' method
# Hint: Study the documentation of the 'apriori()' method
# Hint: Set the 'min_support' parameter to 0.05
# Note: Set the 'use_colnames' parameter to 'True'
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.052466,(beef)
1,0.080529,(bottled beer)
2,0.110524,(bottled water)
3,0.06487,(brown bread)
4,0.055414,(butter)
5,0.077682,(canned beer)
6,0.082766,(citrus fruit)
7,0.058058,(coffee)
8,0.053279,(curd)
9,0.063447,(domestic eggs)


# Task 3 - Build and Analyze Association Rules
For this task, you will explore the rules generated from the collection of itemsets for different performance metrics and threshold values

In [6]:
# Generate association rules for the collection of itemsets 'frequent_itemsets' using the 'association_rules()' method
# Hint: Study the documentation of the 'association_rules()' method
# Note: Use support as the metric and a minimum threshold of 0.05
rules1 = association_rules(frequent_itemsets, metric='support', min_threshold=0.05)
rules1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
2,(whole milk),(rolls/buns),0.255516,0.183935,0.056634,0.221647,1.205032,0.009636,1.048452,0.228543
3,(rolls/buns),(whole milk),0.183935,0.255516,0.056634,0.307905,1.205032,0.009636,1.075696,0.208496
4,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608
5,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732


In [9]:
len(rules1)

6

In [7]:
# Generate association rules for the collection of itemsets 'frequent_itemsets' using the 'association_rules()' method
# Hint: Study the documentation of the 'association_rules()' method
# Note: Use confidence as the metric and a minimum threshold of 0.3
rules2 = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.3)
rules2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
1,(rolls/buns),(whole milk),0.183935,0.255516,0.056634,0.307905,1.205032,0.009636,1.075696,0.208496
2,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732


In [10]:
len(rules2)

3

In [8]:
# Generate association rules for the collection of itemsets 'frequent_itemsets' using the 'association_rules()' method
# Hint: Study the documentation of the 'association_rules()' method
# Note: Use lift as the metric and a minimum threshold of 1.2
rules3 = association_rules(frequent_itemsets, metric='lift', min_threshold=1.2)
rules3

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
2,(whole milk),(rolls/buns),0.255516,0.183935,0.056634,0.221647,1.205032,0.009636,1.048452,0.228543
3,(rolls/buns),(whole milk),0.183935,0.255516,0.056634,0.307905,1.205032,0.009636,1.075696,0.208496
4,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608
5,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732


In [11]:
len(rules3)

6

# Task 4 - Identify Strong Rules
For this task, you will perform the following steps:
- Obtain a collection of rules that satisfy a minimum lift threshold
  - Sort the selected collection of rules by support, confidence and lift and see which rules come up at the top
  - Select rules with high support and confidence

In [13]:
# Generate association rules for the collection of itemsets 'frequent_itemsets' using the 'association_rules()' method
# Hint: Study the documentation of the 'association_rules()' method
# Note: Use lift as the metric and a minimum threshold of 1.5
rules4 = association_rules(frequent_itemsets, metric='lift', min_threshold=1.5)
rules4

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
2,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608
3,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732


In [14]:
len(rules4)

4

In [15]:
# Sort the collection 'rules4' by 'support' using the 'sort_values()' method
# Note: Set 'ascending' to 'False'
rules4.sort_values(by='support', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
2,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608
3,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732


In [16]:
# Sort the collection 'rules4' by 'confidence' using the 'sort_values()' method
# Note: Set 'ascending' to 'False'
rules4.sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
3,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803
2,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608


In [17]:
# Sort the collection 'rules4' by 'lift' using the 'sort_values()' method
# Note: Set 'ascending' to 'False'
rules4.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
3,(yogurt),(whole milk),0.139502,0.255516,0.056024,0.401603,1.571735,0.020379,1.244132,0.422732
2,(whole milk),(yogurt),0.255516,0.139502,0.056024,0.21926,1.571735,0.020379,1.102157,0.488608
1,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,0.025394,1.214013,0.42075
0,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548,0.455803


In [None]:
# Extract rules that have a support of more than 0.05 and a confidence of more than 0.4
rules4[########## CODE HERE ##########]