# Imports

In [1]:
from random import choice
from random import randint
from pandas import DataFrame as Table
from itertools import permutations
from itertools import product

# Itemset

In [2]:
ITEMS = set(['item1','item2','item3','item4','item5','item6','item7','item8'])

In [3]:
ITEMS

{'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'item7', 'item8'}

# Generating a random transaction dataset

In [4]:
def generate_random_transactional_dataset(items,min_samples=1,max_samples=10,entries=20):
    return [set([choice(list(items)) for _ in range(randint(min_samples,max_samples))]) for _ in range(entries)]

Td_list = generate_random_transactional_dataset(ITEMS)

"""
Example from https://sherbold.github.io/intro-to-data-science/05_Association-Rule-Mining.html
Td_list = [
 set(['item1', 'item2', 'item3']),
 set(['item2', 'item4']),
 set(['item1', 'item5']),
 set(['item6', 'item7']),
 set(['item2', 'item3', 'item4', 'item7']),
 set(['item2', 'item3', 'item4', 'item8']),
 set(['item2', 'item4', 'item5']),
 set(['item2', 'item3', 'item4']),
 set(['item4', 'item5']),
 set(['item6', 'item7'])
]
"""

"\nExample from https://sherbold.github.io/intro-to-data-science/05_Association-Rule-Mining.html\nTd_list = [\n set(['item1', 'item2', 'item3']),\n set(['item2', 'item4']),\n set(['item1', 'item5']),\n set(['item6', 'item7']),\n set(['item2', 'item3', 'item4', 'item7']),\n set(['item2', 'item3', 'item4', 'item8']),\n set(['item2', 'item4', 'item5']),\n set(['item2', 'item3', 'item4']),\n set(['item4', 'item5']),\n set(['item6', 'item7'])\n]\n"

In [5]:
Td_list

[{'item1'},
 {'item1', 'item2', 'item7'},
 {'item2', 'item4', 'item5', 'item6', 'item7', 'item8'},
 {'item1', 'item2', 'item3', 'item4', 'item6', 'item7', 'item8'},
 {'item2', 'item4', 'item6', 'item8'},
 {'item3', 'item4', 'item6', 'item7', 'item8'},
 {'item1', 'item2', 'item4', 'item6', 'item7', 'item8'},
 {'item4'},
 {'item2', 'item3', 'item4', 'item7'},
 {'item1', 'item4', 'item5', 'item6', 'item7', 'item8'},
 {'item1', 'item2', 'item4', 'item6'},
 {'item1', 'item2', 'item6', 'item7', 'item8'},
 {'item1', 'item4', 'item5'},
 {'item1', 'item3', 'item4', 'item6', 'item7'},
 {'item1', 'item2', 'item3', 'item4', 'item7'},
 {'item3', 'item4'},
 {'item4', 'item5', 'item6', 'item7', 'item8'},
 {'item3', 'item8'},
 {'item1', 'item4', 'item6', 'item7', 'item8'},
 {'item5'}]

# One hot encoding

In [6]:
def one_hot_encoding(items, Td_list):
    items = list(items)
    items.sort()
    return Table([{ it : it in tr  for it in items} for tr in Td_list])

Td_table = one_hot_encoding(ITEMS,Td_list)

In [7]:
Td_table

Unnamed: 0,item1,item2,item3,item4,item5,item6,item7,item8
0,True,False,False,False,False,False,False,False
1,True,True,False,False,False,False,True,False
2,False,True,False,True,True,True,True,True
3,True,True,True,True,False,True,True,True
4,False,True,False,True,False,True,False,True
5,False,False,True,True,False,True,True,True
6,True,True,False,True,False,True,True,True
7,False,False,False,True,False,False,False,False
8,False,True,True,True,False,False,True,False
9,True,False,False,True,True,True,True,True


# Support and confidence

In [8]:
def support(Td_table,tr):
    tr = list(it for it in tr)
    filtered = Td_table
    for it in tr:
        filtered = filtered.loc[lambda x : x[it]]
    return filtered[tr].shape[0] / Td_table.shape[0]

def confidence(Td_table,X,Y):
    tr = X.union(Y)
    return support(Td_table,tr) / support(Td_table,X)

# A priori algorithm

In [9]:
def next_candidates(Fi_k,items):
    
    # Generate all possibile combinations of each element of Fi_k with another one of items
    unique_combinations = []
    for tr in Fi_k:
        for it in items.difference(tr):
            combination = tr.copy()
            combination.add(it)
            if combination not in unique_combinations:
                unique_combinations.append(combination)

    # Prune candidates
    candidates = []
    
    for combination in unique_combinations:

        discard_transaction = False
        
        # Generate all possible max proper subsets
        for it in combination:
            max_proper_subset = combination.copy()
            max_proper_subset.discard(it)

            # Consequence 2 : k-size candidate
            if max_proper_subset not in Fi_k:
                discard_transaction = True
                break
        
        if not discard_transaction:
            candidates.append(combination)

    return candidates

def Apriori(Td_table,epsilon,items):

    Fi = set()
    candidates = [set([it]) for it in items]
    
    while True:
        Fi_k = [tr for tr in candidates if support(Td_table,tr)>=epsilon]
        Fi = Fi.union(set([frozenset(tr) for tr in Fi_k]))
        candidates = next_candidates(Fi_k,items)
        if not candidates:
            break
    return Fi

Fi = Apriori(Td_table,0.3,ITEMS)
Fi

{frozenset({'item1', 'item4', 'item6'}),
 frozenset({'item1', 'item6', 'item7'}),
 frozenset({'item6', 'item7', 'item8'}),
 frozenset({'item1'}),
 frozenset({'item7', 'item8'}),
 frozenset({'item1', 'item7'}),
 frozenset({'item1', 'item4'}),
 frozenset({'item6'}),
 frozenset({'item1', 'item4', 'item7'}),
 frozenset({'item3', 'item4'}),
 frozenset({'item4', 'item6', 'item8'}),
 frozenset({'item4', 'item6', 'item7'}),
 frozenset({'item1', 'item6'}),
 frozenset({'item4'}),
 frozenset({'item7'}),
 frozenset({'item4', 'item6'}),
 frozenset({'item4', 'item6', 'item7', 'item8'}),
 frozenset({'item2'}),
 frozenset({'item6', 'item7'}),
 frozenset({'item2', 'item7'}),
 frozenset({'item2', 'item4'}),
 frozenset({'item8'}),
 frozenset({'item6', 'item8'}),
 frozenset({'item1', 'item2'}),
 frozenset({'item4', 'item7'}),
 frozenset({'item4', 'item7', 'item8'}),
 frozenset({'item2', 'item6'}),
 frozenset({'item4', 'item8'}),
 frozenset({'item3'})}

# Elimination of singletons

In [10]:
# Elimination of singletons
Fi = set([fi for fi in Fi if len(fi)>1])
Fi

{frozenset({'item1', 'item4'}),
 frozenset({'item1', 'item6'}),
 frozenset({'item1', 'item4', 'item6'}),
 frozenset({'item4', 'item6', 'item7'}),
 frozenset({'item4', 'item8'}),
 frozenset({'item7', 'item8'}),
 frozenset({'item4', 'item7', 'item8'}),
 frozenset({'item6', 'item7', 'item8'}),
 frozenset({'item4', 'item6'}),
 frozenset({'item4', 'item6', 'item8'}),
 frozenset({'item4', 'item7'}),
 frozenset({'item4', 'item6', 'item7', 'item8'}),
 frozenset({'item2', 'item4'}),
 frozenset({'item6', 'item8'}),
 frozenset({'item1', 'item2'}),
 frozenset({'item2', 'item6'}),
 frozenset({'item1', 'item7'}),
 frozenset({'item1', 'item6', 'item7'}),
 frozenset({'item1', 'item4', 'item7'}),
 frozenset({'item3', 'item4'}),
 frozenset({'item6', 'item7'}),
 frozenset({'item2', 'item7'})}

# Generate good rules

In [11]:
def generate_rules(Td_table,Fi,delta):

    good_rules = []

    for fi in list([list(fi) for fi in Fi]):
    
        possibleBinaryValues = list(product([0, 1], repeat=len(fi)))
        possibleBinaryValues.remove(tuple([0 for _ in range(len(fi))]))
        possibleBinaryValues.remove(tuple([1 for _ in range(len(fi))]))
    
        for binVal in possibleBinaryValues:
            X  = []
            Y  = []
            for i in range(len(fi)):
                
                if binVal[i]:
                    X.append(fi[i])
                else:
                    Y.append(fi[i])
    
            # Evaluate confidence of a rule
            if confidence(Td_table,set(X),set(Y)) >= delta:
                good_rules.append(tuple((X,Y)))
    
    return good_rules

generate_rules(Td_table,Fi,0.5)

[(['item6'], ['item1', 'item4']),
 (['item4', 'item6'], ['item1']),
 (['item1'], ['item4', 'item6']),
 (['item1', 'item6'], ['item4']),
 (['item1', 'item4'], ['item6']),
 (['item4'], ['item1']),
 (['item1'], ['item4']),
 (['item6'], ['item7', 'item4']),
 (['item4'], ['item7', 'item6']),
 (['item4', 'item6'], ['item7']),
 (['item7'], ['item4', 'item6']),
 (['item7', 'item6'], ['item4']),
 (['item7', 'item4'], ['item6']),
 (['item6'], ['item1']),
 (['item1'], ['item6']),
 (['item6'], ['item7', 'item4', 'item8']),
 (['item8'], ['item7', 'item4', 'item6']),
 (['item8', 'item6'], ['item7', 'item4']),
 (['item4', 'item6'], ['item7', 'item8']),
 (['item4', 'item8'], ['item7', 'item6']),
 (['item4', 'item8', 'item6'], ['item7']),
 (['item7'], ['item4', 'item8', 'item6']),
 (['item7', 'item6'], ['item4', 'item8']),
 (['item7', 'item8'], ['item4', 'item6']),
 (['item7', 'item8', 'item6'], ['item4']),
 (['item7', 'item4'], ['item8', 'item6']),
 (['item7', 'item4', 'item6'], ['item8']),
 (['item7'