In [255]:
# From the book Machine Learning in Action

In [256]:
class TreeNode:
    def __init__(self, value, count, parent):
        self.value = value
        self.count = count
        self.node = None
        self.parent = parent
        self.children = {}

    def inc(self, count):
        self.count += count
    
    def disp(self, ind = 1):
        print(' ' * ind, self.value, 'count:', self.count)
        for child in self.children.values():
            child.disp(ind + 1)

In [257]:
# Create a root node
root_node = TreeNode('pyramid', 9, None)
root_node.children['eye'] = TreeNode('eye', 13, None)
root_node.disp()

  pyramid count: 9
   eye count: 13


In [258]:
root_node.children['phoenix'] = TreeNode('phoenix', 3, None)
root_node.disp()

  pyramid count: 9
   eye count: 13
   phoenix count: 3


In [259]:
def create_tree(dataset, min_support = 1):
    header_table = {}
    # Iterate dataset twice
    for trans in dataset:
        for item in trans:
            header_table[item] = header_table.get(item, 0) + dataset[trans]
    
    # Remove item not meeting min support
    for k in header_table.keys():
        if header_table[k] < min_support:
            del(header_table[k])
    
    freq_itemset = set(header_table.keys())
    if len(freq_itemset) == 0: 
        return None, None
    
    for k in header_table:
        header_table[k] = [header_table[k], None]
    
    # Tree to be returned
    ret_tree = TreeNode('Null set', 1, None)
    for trans_set, count in dataset.items():
        local = {}
        for item in trans_set:
            if item in freq_itemset:
                local[item] = header_table[item][0]
        if len(local) > 0:
            ordered_items = [v[0] for v in sorted(local.items(),
                                                  key = lambda p: p[1], 
                                                  reverse = True)]
            update_tree(ordered_items, ret_tree, 
                        header_table, count)

    return ret_tree, header_table

In [260]:
def update_tree(items, in_tree, header_table, count):
    if items[0] in in_tree.children:
        in_tree.children[items[0]].inc(count)
    else:
        in_tree.children[items[0]] = TreeNode(items[0], count, in_tree)
        if header_table[items[0]][1] == None:
            header_table[items[0]][1] = in_tree.children[items[0]]
        else:
            update_header(header_table[items[0]][1],
                          in_tree.children[items[0]])
    if len(items) > 1:
        update_tree(items[1::], in_tree.children[items[0]], 
                    header_table, count)

In [261]:
def update_header(node_to_test, target_node):
    while (node_to_test.node != None):
        node_to_test = node_to_test.node
    node_to_test.node = target_node

In [262]:
def load_simple_data():
    return [['r', 'z', 'h', 'j', 'p'],
            ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],
            ['z'],
            ['r', 'x', 'n', 'o', 's'],
            ['y', 'r', 'x', 'z', 'q', 't', 's'],
            ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]

In [263]:
def create_initset(dataset):
    out = {}
    for trans in dataset:
        out[frozenset(trans)] = 1
    return out

In [264]:
initset = create_initset(load_simple_data())
initset

{frozenset({'z'}): 1,
 frozenset({'h', 'j', 'p', 'r', 'z'}): 1,
 frozenset({'s', 't', 'u', 'v', 'w', 'x', 'y', 'z'}): 1,
 frozenset({'n', 'o', 'r', 's', 'x'}): 1,
 frozenset({'q', 'r', 's', 't', 'x', 'y', 'z'}): 1,
 frozenset({'e', 'm', 'q', 's', 't', 'x', 'y', 'z'}): 1}

In [265]:
fp_tree, header = create_tree(initset, 1)
# fp_tree.disp()

In [266]:
def ascend_tree(leaf_node, prefix_path):
    '''Ascends from leaf node to root'''
    if leaf_node.parent != None:
        prefix_path.append(leaf_node.value)
        ascend_tree(leaf_node.parent, prefix_path)

In [267]:
def find_prefix_path(base_path, tree_node):
    '''Tree node comes from header table'''
    cond_paths = {}
    while tree_node != None:
        prefix_path = []
        ascend_tree(tree_node, prefix_path)
        if len(prefix_path) > 1:
            cond_paths[frozenset(prefix_path[1:])] = tree_node.count
        tree_node = tree_node.node
    return cond_paths

In [268]:
# find_prefix_path('x', header['x'][1])

In [273]:
def mine_tree(in_tree, header_table, min_support, pref_fix, freq_item_list):
    big_list = [v[0] for v in sorted(header_table.items(), key = lambda p: p[1][0])]
    
    for base_path in big_list:
        new_freq_set = pref_fix.copy()
        new_freq_set.add(base_path)
        freq_item_list.append(new_freq_set)
        cond_pattern_bases = find_prefix_path(base_path, header_table[base_path][1])
        my_cond_tree, my_head = create_tree(cond_pattern_bases, min_support)
        
        if my_head != None:
            print('conditional tree for: ', new_freq_set)
            my_cond_tree.disp(1)   
            mine_tree(my_cond_tree, my_head, min_support, new_freq_set, freq_item_list)

In [274]:
freq_items = []
mine_tree(fp_tree, header, 1, set([]), freq_items)

conditional tree for:  {'h'}
  Null set count: 1
   z count: 1
    r count: 1
conditional tree for:  {'h', 'r'}
  Null set count: 1
   z count: 1
conditional tree for:  {'p'}
  Null set count: 1
   h count: 1
    z count: 1
     r count: 1
conditional tree for:  {'z', 'p'}
  Null set count: 1
   h count: 1
conditional tree for:  {'p', 'r'}
  Null set count: 1
   z count: 1
    h count: 1
conditional tree for:  {'h', 'p', 'r'}
  Null set count: 1
   z count: 1
conditional tree for:  {'j'}
  Null set count: 1
   h count: 1
    z count: 1
     p count: 1
      r count: 1
conditional tree for:  {'z', 'j'}
  Null set count: 1
   h count: 1
conditional tree for:  {'p', 'j'}
  Null set count: 1
   z count: 1
    h count: 1
conditional tree for:  {'h', 'p', 'j'}
  Null set count: 1
   z count: 1
conditional tree for:  {'r', 'j'}
  Null set count: 1
   z count: 1
    p count: 1
     h count: 1
conditional tree for:  {'p', 'r', 'j'}
  Null set count: 1
   z count: 1
conditional tree for:  {'h', 

    s count: 1
conditional tree for:  {'q', 'x', 's', 'y', 'z', 'e'}
  Null set count: 1
   m count: 1
conditional tree for:  {'q', 't', 'x', 'e'}
  Null set count: 1
   z count: 1
    m count: 1
     s count: 1
      y count: 1
conditional tree for:  {'m', 't', 'e', 'q', 'x'}
  Null set count: 1
   z count: 1
conditional tree for:  {'s', 't', 'e', 'q', 'x'}
  Null set count: 1
   z count: 1
    m count: 1
conditional tree for:  {'q', 'm', 's', 'x', 't', 'e'}
  Null set count: 1
   z count: 1
conditional tree for:  {'y', 't', 'e', 'q', 'x'}
  Null set count: 1
   z count: 1
    m count: 1
     s count: 1
conditional tree for:  {'q', 'm', 'x', 'y', 't', 'e'}
  Null set count: 1
   z count: 1
conditional tree for:  {'q', 'x', 's', 'y', 't', 'e'}
  Null set count: 1
   z count: 1
    m count: 1
conditional tree for:  {'q', 'm', 'x', 's', 'y', 't', 'e'}
  Null set count: 1
   z count: 1
conditional tree for:  {'q'}
  Null set count: 1
   s count: 2
    y count: 2
     z count: 2
      t co