Mall Transaction Set

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Given mall transaction set
transactions_mall = [
    ['Bread', 'Milk'],
    ['Bread', 'Diaper', 'Beer', 'Eggs'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Milk', 'Diaper', 'Beer'],
    ['Bread', 'Milk', 'Diaper', 'Coke'],
    ['Bread', 'Diaper', 'Beer'],
    ['Bread', 'Milk', 'Diaper'],
    ['Bread', 'Beer', 'Coke'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke']
]

# Transform the mall transaction data into a dataframe
te = TransactionEncoder()
te_ary = te.fit(transactions_mall).transform(transactions_mall)
df_mall = pd.DataFrame(te_ary, columns=te.columns_)

In [2]:
df_mall

Unnamed: 0,Beer,Bread,Coke,Diaper,Eggs,Milk
0,False,True,False,False,False,True
1,True,True,False,True,True,False
2,True,False,True,True,False,True
3,True,True,False,True,False,True
4,False,True,True,True,False,True
5,True,True,False,True,False,False
6,False,True,False,True,False,True
7,True,True,True,False,False,False
8,True,True,False,True,False,True
9,True,False,True,True,False,True


In [3]:
# Use the Apriori algorithm to find frequent itemsets
# Adjust the min_support parameter as needed
frequent_itemsets_mall = apriori(df_mall, min_support=0.3, use_colnames=True)

print("frequent itemset:")
frequent_itemsets_mall

frequent itemset:


Unnamed: 0,support,itemsets
0,0.7,(Beer)
1,0.8,(Bread)
2,0.4,(Coke)
3,0.8,(Diaper)
4,0.7,(Milk)
5,0.5,"(Beer, Bread)"
6,0.3,"(Beer, Coke)"
7,0.6,"(Diaper, Beer)"
8,0.4,"(Milk, Beer)"
9,0.6,"(Diaper, Bread)"


In [4]:
frequent_itemsets_mall_sorted = frequent_itemsets_mall.sort_values(by='support', ascending=False)
frequent_itemsets_mall_sorted

Unnamed: 0,support,itemsets
3,0.8,(Diaper)
1,0.8,(Bread)
0,0.7,(Beer)
4,0.7,(Milk)
7,0.6,"(Diaper, Beer)"
13,0.6,"(Milk, Diaper)"
9,0.6,"(Diaper, Bread)"
5,0.5,"(Beer, Bread)"
10,0.5,"(Milk, Bread)"
8,0.4,"(Milk, Beer)"


In [5]:
# Generate the rules with their corresponding support, confidence, and lift
rules_mall = association_rules(frequent_itemsets_mall, metric="confidence", min_threshold=0.7)

rules_mall[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Beer),(Bread),0.5,0.714286,0.892857
1,(Coke),(Beer),0.3,0.75,1.071429
2,(Diaper),(Beer),0.6,0.75,1.071429
3,(Beer),(Diaper),0.6,0.857143,1.071429
4,(Diaper),(Bread),0.6,0.75,0.9375
5,(Bread),(Diaper),0.6,0.75,0.9375
6,(Milk),(Bread),0.5,0.714286,0.892857
7,(Coke),(Diaper),0.3,0.75,0.9375
8,(Coke),(Milk),0.3,0.75,1.071429
9,(Milk),(Diaper),0.6,0.857143,1.071429


In [6]:
# Display the support of specific itemset
specific_itemset = frequent_itemsets_mall[frequent_itemsets_mall['itemsets'] == frozenset({'Bread', 'Milk'})]
if not specific_itemset.empty:
    support_value = specific_itemset.iloc[0]['support']
    print(f"The support for the itemset {{Bread,Milk}} is: {support_value}")
else:
    print("The specified itemset is not present in the frequent itemsets.")

The support for the itemset {Bread,Milk} is: 0.5


In [7]:
#Sort the rules according to confidence
rules_mall_sorted = rules_mall.sort_values(by='confidence', ascending=False)
rules_mall_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
15,"(Diaper, Coke)",(Milk),0.3,1.0,1.428571
14,"(Milk, Coke)",(Diaper),0.3,1.0,1.25
12,"(Milk, Beer)",(Diaper),0.4,1.0,1.25
9,(Milk),(Diaper),0.6,0.857143,1.071429
3,(Beer),(Diaper),0.6,0.857143,1.071429
13,"(Milk, Bread)",(Diaper),0.4,0.8,1.0
11,"(Beer, Bread)",(Diaper),0.4,0.8,1.0
8,(Coke),(Milk),0.3,0.75,1.071429
10,(Diaper),(Milk),0.6,0.75,1.071429
16,(Coke),"(Milk, Diaper)",0.3,0.75,1.25


In [8]:
#Sort the rules according to lift
rules_mall_sorted = rules_mall.sort_values(by='lift', ascending=False)
rules_mall_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
15,"(Diaper, Coke)",(Milk),0.3,1.0,1.428571
14,"(Milk, Coke)",(Diaper),0.3,1.0,1.25
12,"(Milk, Beer)",(Diaper),0.4,1.0,1.25
16,(Coke),"(Milk, Diaper)",0.3,0.75,1.25
1,(Coke),(Beer),0.3,0.75,1.071429
10,(Diaper),(Milk),0.6,0.75,1.071429
9,(Milk),(Diaper),0.6,0.857143,1.071429
8,(Coke),(Milk),0.3,0.75,1.071429
3,(Beer),(Diaper),0.6,0.857143,1.071429
2,(Diaper),(Beer),0.6,0.75,1.071429


Digital Store Transaction Set

In [9]:

# Given digital store transaction set
transactions_digital = [
    ['Laptop', 'Printer', 'Tablet', 'Headset'],
    ['Printer', 'Monitor', 'Tablet'],
    ['Laptop', 'Printer', 'Tablet', 'Headset'],
    ['Laptop', 'Monitor', 'Tablet', 'Headset'],
    ['Printer', 'Monitor', 'Tablet', 'Headset'],
    ['Printer', 'Tablet', 'Headset'],
    ['Monitor', 'Tablet'],
    ['Laptop', 'Printer', 'Monitor'],
    ['Laptop', 'Tablet', 'Headset'],
    ['Printer', 'Tablet']
]

In [10]:
# Initialize TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions_digital).transform(transactions_digital)

# Transform the transaction data into a dataframe
df_digital = pd.DataFrame(te_ary, columns=te.columns_)

In [11]:
df_digital

Unnamed: 0,Headset,Laptop,Monitor,Printer,Tablet
0,True,True,False,True,True
1,False,False,True,True,True
2,True,True,False,True,True
3,True,True,True,False,True
4,True,False,True,True,True
5,True,False,False,True,True
6,False,False,True,False,True
7,False,True,True,True,False
8,True,True,False,False,True
9,False,False,False,True,True


In [12]:
# Use the Apriori algorithm to find frequent itemsets
# Adjust the min_support parameter as needed
frequent_itemsets_digital = apriori(df_digital, min_support=0.4, use_colnames=True)

# Display the frequent itemsets
frequent_itemsets_digital

Unnamed: 0,support,itemsets
0,0.6,(Headset)
1,0.5,(Laptop)
2,0.5,(Monitor)
3,0.7,(Printer)
4,0.9,(Tablet)
5,0.4,"(Laptop, Headset)"
6,0.4,"(Headset, Printer)"
7,0.6,"(Headset, Tablet)"
8,0.4,"(Laptop, Tablet)"
9,0.4,"(Monitor, Tablet)"


In [13]:
frequent_itemsets_digital_sorted = frequent_itemsets_digital.sort_values(by='support', ascending=False)
frequent_itemsets_digital_sorted

Unnamed: 0,support,itemsets
4,0.9,(Tablet)
3,0.7,(Printer)
0,0.6,(Headset)
7,0.6,"(Headset, Tablet)"
10,0.6,"(Printer, Tablet)"
1,0.5,(Laptop)
2,0.5,(Monitor)
5,0.4,"(Laptop, Headset)"
6,0.4,"(Headset, Printer)"
8,0.4,"(Laptop, Tablet)"


In [14]:
# Display the support of specific itemset
specific_itemset = frequent_itemsets_digital[frequent_itemsets_digital['itemsets'] == frozenset({'Monitor', 'Tablet'})]
if not specific_itemset.empty:
    support_value = specific_itemset.iloc[0]['support']
    print(f"The support for the itemset {{Monitor, Tablet}} is: {support_value}")
else:
    print("The specified itemset is not present in the frequent itemsets.")

The support for the itemset {Monitor, Tablet} is: 0.4


In [15]:
# Generate the rules with their corresponding support, confidence, and lift
rules_digital = association_rules(frequent_itemsets_digital, metric="confidence", min_threshold=0.1)

# Display the association rules along with the lift
rules_digital[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Laptop),(Headset),0.4,0.8,1.333333
1,(Headset),(Laptop),0.4,0.666667,1.333333
2,(Headset),(Printer),0.4,0.666667,0.952381
3,(Printer),(Headset),0.4,0.571429,0.952381
4,(Headset),(Tablet),0.6,1.0,1.111111
5,(Tablet),(Headset),0.6,0.666667,1.111111
6,(Laptop),(Tablet),0.4,0.8,0.888889
7,(Tablet),(Laptop),0.4,0.444444,0.888889
8,(Monitor),(Tablet),0.4,0.8,0.888889
9,(Tablet),(Monitor),0.4,0.444444,0.888889


In [16]:
#Sort the rules according to confidence
rules_digital_sorted = rules_digital.sort_values(by='confidence', ascending=False)
rules_digital_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
12,"(Laptop, Headset)",(Tablet),0.4,1.0,1.111111
18,"(Headset, Printer)",(Tablet),0.4,1.0,1.111111
4,(Headset),(Tablet),0.6,1.0,1.111111
13,"(Laptop, Tablet)",(Headset),0.4,1.0,1.666667
10,(Printer),(Tablet),0.6,0.857143,0.952381
8,(Monitor),(Tablet),0.4,0.8,0.888889
15,(Laptop),"(Headset, Tablet)",0.4,0.8,1.333333
0,(Laptop),(Headset),0.4,0.8,1.333333
6,(Laptop),(Tablet),0.4,0.8,0.888889
1,(Headset),(Laptop),0.4,0.666667,1.333333


In [17]:
#Sort the rules according to lift
rules_digital_sorted = rules_digital.sort_values(by='lift', ascending=False)
rules_digital_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
16,(Headset),"(Laptop, Tablet)",0.4,0.666667,1.666667
13,"(Laptop, Tablet)",(Headset),0.4,1.0,1.666667
0,(Laptop),(Headset),0.4,0.8,1.333333
1,(Headset),(Laptop),0.4,0.666667,1.333333
15,(Laptop),"(Headset, Tablet)",0.4,0.8,1.333333
14,"(Headset, Tablet)",(Laptop),0.4,0.666667,1.333333
21,(Headset),"(Printer, Tablet)",0.4,0.666667,1.111111
20,"(Printer, Tablet)",(Headset),0.4,0.666667,1.111111
18,"(Headset, Printer)",(Tablet),0.4,1.0,1.111111
17,(Tablet),"(Laptop, Headset)",0.4,0.444444,1.111111


In [18]:
# Filter for the rule {Monitor} → {Tablet}
monitor_to_tablet = rules_digital[(rules_digital['antecedents'] == frozenset({'Monitor'})) & 
                          (rules_digital['consequents'] == frozenset({'Tablet'}))]

# Display the rule {Monitor} → {Tablet}
monitor_to_tablet

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
8,(Monitor),(Tablet),0.5,0.9,0.4,0.8,0.888889,-0.05,0.5,-0.2
