In [580]:
data = [['A', 'B', 'D', 'E'],
        ['B', 'C', 'E'],
        ['A', 'B', 'D', 'E'],
        ['A', 'B', 'C', 'E'],
        ['A', 'B', 'C', 'D', 'E'],
        ['B', 'C', 'D']]

# data = [['E', 'A', 'D', 'B'],
#         ['D', 'A', 'C', 'E', 'B'],
#         ['C', 'A', 'B', 'E'],
#         ['B', 'A', 'D'],
#         ['D'],
#         ['D', 'B'],
#         ['A', 'D', 'E'],
#         ['B', 'C']]

data = [['r', 'z', 'h', 'j', 'p'],
        ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],
        ['z'],
        ['r', 'x', 'n', 'o', 's'],
        ['y', 'r', 'x', 'z', 'q', 't', 'p'],
        ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]

In [581]:
class Node:
    count = 0
    def __init__(self, item, parent):
        self.item = item
        self.count += 1
        self.children = {}
        self.parent = parent
        self.node_link = None

    def incr(self):
        self.count += 1
        
    def print(self, depth=0):
        print(' ' * depth, f'{self.item}:{self.count}')
        for child in self.children.values():
            child.print(depth + 1)

In [582]:
class FrequentItemHeader:
    """
    FrequentItemHeader stores the item name
    and the node link pointing to the node with
    the same name."""
    def __init__(self, frequent_items):
        self.node_links = {}
        self.frequent_items = frequent_items
        
    def link(self, node):
        if node is None: return
        if node.item in self.node_links:
            curr = self.node_links[node.item]
            while curr and curr.node_link:
                curr = curr.node_link
            curr.node_link = node
        else:
            self.node_links[node.item] = node

In [583]:
def construct_fp_tree(data, min_support=3):
    # Collect the frequent items.
    frequent_items = {}
    for row in data:
        # Count the duplicate item once only.
        for item in set(row):
            count = frequent_items.get(item, 0)
            frequent_items[item] = count + 1

    # Sort by values, then key.
    frequent_items_list = sorted(frequent_items.items(), 
#                                  key=lambda t: (-t[1], t[0]))
                                 key=lambda t: t[1],
                                 reverse=True)
    frequent_items_list = [(item, count)
                            for item, count in frequent_items_list
                            if count >= min_support]
    
    frequent_item_header = FrequentItemHeader(frequent_items_list)
    # Create the root of the FP-tree.
    root = Node(None, None)
    
    def insert_tree(tree, items):
        if len(items) == 0: return
        item = items[0]
        if item in tree.children:
            # Increment the count if the children exist.
            tree.children[item].incr()
        else:
            # Create a new node and link back to the parent.
            tree.children[item] = Node(item, tree)
            # Node-link the header.
            frequent_item_header.link(tree.children[item])
        
        # If there are items, recursively add them.
        if len(items) > 0:
            insert_tree(tree.children[item], items[1:])


    for row in data:
        # Sort them in ascending order to the frequent items list.
        items = [item 
                 for item, count in frequent_items_list
                 if item in row]
        insert_tree(root, items)
    return root, frequent_item_header

In [584]:
tree, header = construct_fp_tree(data)
tree.print()

 None:1
  z:5
   r:1
   x:3
    t:2
     y:2
      s:2
    r:1
     t:1
      y:1
  x:1
   r:1
    s:1


In [585]:
header.frequent_items, header.node_links

([('z', 5), ('x', 4), ('r', 3), ('t', 3), ('y', 3), ('s', 3)],
 {'z': <__main__.Node at 0x112b33518>,
  'r': <__main__.Node at 0x112b33d68>,
  'x': <__main__.Node at 0x112b33ba8>,
  't': <__main__.Node at 0x1132d90b8>,
  'y': <__main__.Node at 0x1132d9d30>,
  's': <__main__.Node at 0x1132d9588>})

In [None]:
# Start mining from the bottom of the tree.
def mine_tree(header, tree, prefix_base = [], result = []):
    prefixes = list(reversed([item for item, count in header.frequent_items]))
    for prefix in prefixes:
        if prefix not in header.node_links: continue
        node_link = header.node_links[prefix]
        if node_link is None:
            continue

        conditional_patterns = []
        while node_link is not None:
            patterns = set()
            parent = node_link #.parent
            while parent is not None:
                patterns.add(parent.item)
                parent = parent.parent
            patterns.remove(None)
            print(prefix, '->', patterns)
            conditional_patterns.append(prefix_base + list(patterns))
            node_link = node_link.node_link
        print('conditional_patterns', conditional_patterns)
#         for patterns in conditional_patterns:
        result.extend(conditional_patterns)
        innertree, innerheader = construct_fp_tree(conditional_patters)
        innertree.print()
        mine_tree(innerheader, innertree, prefix_base + [prefix], result)
        print()

In [None]:
result = []
mine_tree(header, tree, [], result)
result

In [588]:
 [set(['y']), set(['y', 'z']), set(['y', 'x', 'z']), set(['y', 'x']),
set(['s']), set(['x', 's']), set(['t']), set(['y', 't']), set(['x',
't']), set(['y', 'x', 't']), set(['z', 't']), set(['y', 'z', 't']),
set(['x', 'z', 't']), set(['y', 'x', 'z', 't']), set(['r']), set(['x']),
set(['x', 'z']), set(['z'])]

[{'y'},
 {'y', 'z'},
 {'x', 'y', 'z'},
 {'x', 'y'},
 {'s'},
 {'s', 'x'},
 {'t'},
 {'t', 'y'},
 {'t', 'x'},
 {'t', 'x', 'y'},
 {'t', 'z'},
 {'t', 'y', 'z'},
 {'t', 'x', 'z'},
 {'t', 'x', 'y', 'z'},
 {'r'},
 {'x'},
 {'x', 'z'},
 {'z'}]