In [1]:
# Question 3: Association Rule Mining using Apriori Algorithm

#This notebook demonstrates association rule mining using the Apriori principle.

In [2]:
transactions = [
    ['Milk', 'Bread'],
    ['Bread', 'Diaper', 'Beer', 'Eggs'],
    ['Milk', 'Diaper', 'Beer', 'Cola'],
    ['Bread', 'Milk', 'Diaper', 'Beer'],
    ['Bread', 'Milk', 'Diaper', 'Cola']
]

In [3]:
from collections import defaultdict

item_count = defaultdict(int)

for transaction in transactions:
    for item in transaction:
        item_count[item] += 1

item_count

defaultdict(int,
            {'Milk': 4,
             'Bread': 4,
             'Diaper': 4,
             'Beer': 3,
             'Eggs': 1,
             'Cola': 2})

In [4]:
num_transactions = len(transactions)

item_support = {
    item: count / num_transactions
    for item, count in item_count.items()
}

item_support

{'Milk': 0.8,
 'Bread': 0.8,
 'Diaper': 0.8,
 'Beer': 0.6,
 'Eggs': 0.2,
 'Cola': 0.4}

In [5]:
#Items with support below a minimum threshold are removed to eliminate infrequent and insignificant patterns.

In [6]:
min_support = 0.6

frequent_items = [
    item for item, support in item_support.items()
    if support >= min_support
]

frequent_items

['Milk', 'Bread', 'Diaper', 'Beer']

In [7]:
from itertools import combinations

item_pairs = list(combinations(frequent_items, 2))
item_pairs

[('Milk', 'Bread'),
 ('Milk', 'Diaper'),
 ('Milk', 'Beer'),
 ('Bread', 'Diaper'),
 ('Bread', 'Beer'),
 ('Diaper', 'Beer')]

In [8]:
#Calculating Support for Item Pairs

In [9]:
pair_count = defaultdict(int)

for transaction in transactions:
    for pair in item_pairs:
        if pair[0] in transaction and pair[1] in transaction:
            pair_count[pair] += 1

pair_count

defaultdict(int,
            {('Milk', 'Bread'): 3,
             ('Bread', 'Diaper'): 3,
             ('Bread', 'Beer'): 2,
             ('Diaper', 'Beer'): 3,
             ('Milk', 'Diaper'): 3,
             ('Milk', 'Beer'): 2})

In [10]:
pair_support = {
    pair: count / num_transactions
    for pair, count in pair_count.items()
}

pair_support

{('Milk', 'Bread'): 0.6,
 ('Bread', 'Diaper'): 0.6,
 ('Bread', 'Beer'): 0.4,
 ('Diaper', 'Beer'): 0.6,
 ('Milk', 'Diaper'): 0.6,
 ('Milk', 'Beer'): 0.4}

In [11]:
#Generating Association Rules using Confidence

In [12]:
rules = []

for pair in pair_support:
    A, B = pair
    
    confidence_A_to_B = pair_support[pair] / item_support[A]
    confidence_B_to_A = pair_support[pair] / item_support[B]
    
    rules.append((A, B, confidence_A_to_B))
    rules.append((B, A, confidence_B_to_A))

rules

[('Milk', 'Bread', 0.7499999999999999),
 ('Bread', 'Milk', 0.7499999999999999),
 ('Bread', 'Diaper', 0.7499999999999999),
 ('Diaper', 'Bread', 0.7499999999999999),
 ('Bread', 'Beer', 0.5),
 ('Beer', 'Bread', 0.6666666666666667),
 ('Diaper', 'Beer', 0.7499999999999999),
 ('Beer', 'Diaper', 1.0),
 ('Milk', 'Diaper', 0.7499999999999999),
 ('Diaper', 'Milk', 0.7499999999999999),
 ('Milk', 'Beer', 0.5),
 ('Beer', 'Milk', 0.6666666666666667)]

In [13]:
min_confidence = 0.7

strong_rules = [
    rule for rule in rules
    if rule[2] >= min_confidence
]

strong_rules

[('Milk', 'Bread', 0.7499999999999999),
 ('Bread', 'Milk', 0.7499999999999999),
 ('Bread', 'Diaper', 0.7499999999999999),
 ('Diaper', 'Bread', 0.7499999999999999),
 ('Diaper', 'Beer', 0.7499999999999999),
 ('Beer', 'Diaper', 1.0),
 ('Milk', 'Diaper', 0.7499999999999999),
 ('Diaper', 'Milk', 0.7499999999999999)]

In [14]:
#Lift Calculation

In [15]:
rules_with_lift = []

for rule in rules:
    A, B, confidence = rule
    lift = confidence / item_support[B]
    rules_with_lift.append((A, B, confidence, lift))

rules_with_lift

[('Milk', 'Bread', 0.7499999999999999, 0.9374999999999998),
 ('Bread', 'Milk', 0.7499999999999999, 0.9374999999999998),
 ('Bread', 'Diaper', 0.7499999999999999, 0.9374999999999998),
 ('Diaper', 'Bread', 0.7499999999999999, 0.9374999999999998),
 ('Bread', 'Beer', 0.5, 0.8333333333333334),
 ('Beer', 'Bread', 0.6666666666666667, 0.8333333333333334),
 ('Diaper', 'Beer', 0.7499999999999999, 1.2499999999999998),
 ('Beer', 'Diaper', 1.0, 1.25),
 ('Milk', 'Diaper', 0.7499999999999999, 0.9374999999999998),
 ('Diaper', 'Milk', 0.7499999999999999, 0.9374999999999998),
 ('Milk', 'Beer', 0.5, 0.8333333333333334),
 ('Beer', 'Milk', 0.6666666666666667, 0.8333333333333334)]

In [16]:
min_confidence = 0.7

strong_rules = [
    rule for rule in rules_with_lift
    if rule[2] >= min_confidence and rule[3] > 1
]

strong_rules

[('Diaper', 'Beer', 0.7499999999999999, 1.2499999999999998),
 ('Beer', 'Diaper', 1.0, 1.25)]

In [None]:
#75% of Diaper purchases include Beer

#Beer is 25% more likely with Diaper than random