In [169]:
# Another implementation of Apriori, based on the book Machine Learning in Action

In [170]:
def load_dataset():
#     return [[1, 3, 4], 
#             [2, 3, 5],
#             [1, 2, 3, 5],
#             [2, 5]]
    return [['Mango', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo'],
           ['Doll', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo'],
           ['Mango', 'Apple', 'Key-chain', 'Eggs'],
           ['Mango', 'Umbrella', 'Corn', 'Key-chain', 'Yo-yo'],
           ['Corn', 'Onion', 'Onion', 'Key-chain', 'Ice-cream', 'Eggs']]

In [171]:
def create_candidates(dataset):
    candidates = []
    for transaction in dataset:
        for item in transaction:
            if not [item] in candidates:
                candidates.append([item])
#     candidates.sort()
    return map(frozenset, candidates)

In [172]:
def scan_dataset(dataset, candidates, min_support):
    subset_candidate = {}
    output = []
    support_data = {}
    num_items = float(len(dataset))

    for trans_id in dataset:
        for candidate in candidates:
            if candidate.issubset(trans_id):
                if not subset_candidate.get(candidate):
                    subset_candidate[candidate] = 1
                else:
                    subset_candidate[candidate] += 1

    for key in subset_candidate:
        support = subset_candidate[key] / num_items
        if support >= min_support:
            output.append(key)
        support_data[key] = support
    return output, support_data

In [173]:
def apriori_generate(itemset, k):
    output = []
    len_itemset = len(itemset)
    for i in range(len_itemset):
        for j in range(i + 1, len_itemset):
            itemset_1 = list(itemset[i])[:k-2]
            itemset_2 = list(itemset[j])[:k-2]
            itemset_1.sort()
            itemset_2.sort()
            if itemset_1 == itemset_2:
                output.append(itemset[i] | itemset[j])
    return output

In [174]:
def apriori(dataset, min_support = 0.2):
    candidates = create_candidates(dataset)
    data = list(map(set, dataset))
    itemset_1, support = scan_dataset(data, candidates, min_support)
    itemsets = [itemset_1]
    k = 2
    while len(itemsets[k - 2]) > 0:
        candidates_k = apriori_generate(itemsets[k - 2], k)
        itemsets_k, support_k = scan_dataset(data, candidates_k, min_support)
        support.update(support_k)
        itemsets.append(itemsets_k)
        k += 1
    return itemsets, support

itemsets, support_data = apriori(load_dataset())
print('itemsets:', itemsets)
print()
print('support_data:', support_data)

itemsets: [[frozenset({'Mango'}), frozenset({'Onion'}), frozenset({'Nintendo'}), frozenset({'Key-chain'}), frozenset({'Eggs'}), frozenset({'Yo-yo'})], [frozenset({'Onion', 'Mango'}), frozenset({'Nintendo', 'Mango'}), frozenset({'Mango', 'Key-chain'}), frozenset({'Mango', 'Eggs'}), frozenset({'Mango', 'Yo-yo'}), frozenset({'Onion', 'Nintendo'}), frozenset({'Onion', 'Key-chain'}), frozenset({'Onion', 'Eggs'}), frozenset({'Onion', 'Yo-yo'}), frozenset({'Nintendo', 'Key-chain'}), frozenset({'Nintendo', 'Eggs'}), frozenset({'Nintendo', 'Yo-yo'}), frozenset({'Eggs', 'Key-chain'}), frozenset({'Key-chain', 'Yo-yo'}), frozenset({'Eggs', 'Yo-yo'})], [frozenset({'Nintendo', 'Onion', 'Mango'}), frozenset({'Onion', 'Mango', 'Key-chain'}), frozenset({'Onion', 'Mango', 'Eggs'}), frozenset({'Onion', 'Mango', 'Yo-yo'}), frozenset({'Nintendo', 'Mango', 'Key-chain'}), frozenset({'Nintendo', 'Mango', 'Eggs'}), frozenset({'Nintendo', 'Mango', 'Yo-yo'}), frozenset({'Eggs', 'Mango', 'Key-chain'}), frozenset(

In [175]:
# {a -> b}, a = antecedent, b = consequent

def generate_rules(itemsets, support_data, min_confidence = 0.7):
    big_rule_list = []
    for i in range(1, len(itemsets)):
        for freq_set in itemsets[i]:
            h_1 = [frozenset([item]) for item in freq_set]
            if i > 1:
                rules_from_consequent(freq_set, h_1, support_data, big_rule_list, min_confidence)
            else:
                calculate_confidence(freq_set, h_1, support_data, big_rule_list, min_confidence)
    return big_rule_list

In [179]:
def calculate_confidence(freq_set, h, support_data, big_rule_list, min_confidence = 0.7):
    pruned_h = []
    for consequent in h:
        confidence = support_data[freq_set] / support_data[freq_set - consequent]
        if confidence >= min_confidence:
            print('{} ---> {}, conf: {}'.format(freq_set - consequent, consequent, confidence))
            big_rule_list.append((freq_set - consequent, consequent, confidence))
            pruned_h.append(consequent)
    return pruned_h

In [177]:
def rules_from_consequent(freq_set, h, support_data, big_rule_list, min_confidence = 0.7):
    m = len(h[0])
    if len(freq_set) > (m + 1):
        hmp1 = apriori_generate(h, m + 1)
        hmp1 = calculate_confidence(freq_set, hmp1, support_data, big_rule_list, min_confidence)
        if len(hmp1) > 1:
            rules_from_consequent(freq_set, hmp1, support_data, big_rule_list, min_confidence)

In [180]:
rules = generate_rules(itemsets, support_data, min_confidence = 0.7)
rules

frozenset({'Mango'}) ---> frozenset({'Onion'}), conf: 1.0
frozenset({'Onion'}) ---> frozenset({'Mango'}), conf: 1.0
frozenset({'Mango'}) ---> frozenset({'Nintendo'}), conf: 1.0
frozenset({'Nintendo'}) ---> frozenset({'Mango'}), conf: 1.0
frozenset({'Key-chain'}) ---> frozenset({'Mango'}), conf: 2.9999999999999996
frozenset({'Mango'}) ---> frozenset({'Key-chain'}), conf: 2.9999999999999996
frozenset({'Eggs'}) ---> frozenset({'Mango'}), conf: 2.0
frozenset({'Mango'}) ---> frozenset({'Eggs'}), conf: 2.0
frozenset({'Yo-yo'}) ---> frozenset({'Mango'}), conf: 2.0
frozenset({'Mango'}) ---> frozenset({'Yo-yo'}), conf: 2.0
frozenset({'Nintendo'}) ---> frozenset({'Onion'}), conf: 2.0
frozenset({'Onion'}) ---> frozenset({'Nintendo'}), conf: 2.0
frozenset({'Key-chain'}) ---> frozenset({'Onion'}), conf: 2.9999999999999996
frozenset({'Onion'}) ---> frozenset({'Key-chain'}), conf: 2.9999999999999996
frozenset({'Eggs'}) ---> frozenset({'Onion'}), conf: 2.9999999999999996
frozenset({'Onion'}) ---> froz

[(frozenset({'Mango'}), frozenset({'Onion'}), 1.0),
 (frozenset({'Onion'}), frozenset({'Mango'}), 1.0),
 (frozenset({'Mango'}), frozenset({'Nintendo'}), 1.0),
 (frozenset({'Nintendo'}), frozenset({'Mango'}), 1.0),
 (frozenset({'Key-chain'}), frozenset({'Mango'}), 2.9999999999999996),
 (frozenset({'Mango'}), frozenset({'Key-chain'}), 2.9999999999999996),
 (frozenset({'Eggs'}), frozenset({'Mango'}), 2.0),
 (frozenset({'Mango'}), frozenset({'Eggs'}), 2.0),
 (frozenset({'Yo-yo'}), frozenset({'Mango'}), 2.0),
 (frozenset({'Mango'}), frozenset({'Yo-yo'}), 2.0),
 (frozenset({'Nintendo'}), frozenset({'Onion'}), 2.0),
 (frozenset({'Onion'}), frozenset({'Nintendo'}), 2.0),
 (frozenset({'Key-chain'}), frozenset({'Onion'}), 2.9999999999999996),
 (frozenset({'Onion'}), frozenset({'Key-chain'}), 2.9999999999999996),
 (frozenset({'Eggs'}), frozenset({'Onion'}), 2.9999999999999996),
 (frozenset({'Onion'}), frozenset({'Eggs'}), 2.9999999999999996),
 (frozenset({'Yo-yo'}), frozenset({'Onion'}), 2.0),
 (