In [6]:
import numpy
from csv import reader
from collections import defaultdict
from itertools import chain, combinations

In [7]:
def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))


def getSupport(testSet, itemSetList):
    count = 0
    for itemSet in itemSetList:
        if(set(testSet).issubset(itemSet)):
            count += 1
    return count


def associationRule(freqItemSet, itemSetList, minConf):
    rules = []

    for itemSet in freqItemSet:
        subsets = powerset(itemSet)
        itemSetSup = getSupport(itemSet, itemSetList)
        
        for s in subsets:
            confidence = float(itemSetSup / getSupport(s, itemSetList))
            if(confidence > minConf):
                rules.append([set(s), set(itemSet.difference(s)), confidence])
    
    return rules

In [8]:
class FPNode:
    def __init__(self, itemName, frequency, parentNode):
        self.itemName = itemName
        self.count = frequency
        self.parent = parentNode
        self.children = {}
        self.next = None

    def increment(self, frequency):
        self.count += frequency

    def display(self, ind=1):
        print('  ' * ind, self.itemName, ' ', self.count)
        for child in list(self.children.values()):
            child.display(ind + 1)

In [9]:
class FPGrowth:
    def __init__(self, minimumSupport, minimumConfidence):
        self.minimumSupport = minimumSupport
        self.minimumConfidence = minimumConfidence
        self.listOfItemset = []
        self.frequencyOfTransaction = []

    def getDatasetFromFile(self, filename):
        with open(filename, 'r') as file:
            csv_reader = reader(file)
            for line in csv_reader:
                line = list(filter(None, line))
                self.listOfItemset.append(line)
                self.frequencyOfTransaction.append(1)

    def updateHeaderTable(self, item, targetNode, headerTable):
        if headerTable[item][1] is None:
            headerTable[item][1] = targetNode
        else:
            currentNode = headerTable[item][1]
            while currentNode.next is not None:
                currentNode = currentNode.next
            currentNode.next = targetNode

    def updateTree(self, item, parentNode, headerTable):
        if item in parentNode.children:
            parentNode.children[item].increment(1)
        else:
            newItemNode = FPNode(item, 1, parentNode)
            parentNode.children[item] = newItemNode
            self.updateHeaderTable(item, newItemNode, headerTable)
        return parentNode.children[item]

    def constructFPTree(self):
        headerTable = defaultdict(int)
        for i, itemset in enumerate(self.listOfItemset):
            for item in itemset:
                headerTable[item] += self.frequencyOfTransaction[i]

        headerTable = dict((item, supportValue) for item, supportValue in headerTable.items() if supportValue >= self.minimumSupport)

        if len(headerTable) == 0:
            return None, None

        for item in headerTable:
            headerTable[item] = [headerTable[item], None]

        initialNode = FPNode('Null', 1, None)

        for _, itemset in enumerate(self.listOfItemset):
            itemset = [item for item in itemset if item in headerTable]
            itemset.sort(key=lambda item: (-headerTable[item][0], item))

            currentNode = initialNode
            for item in itemset:
                currentNode = self.updateTree(item, currentNode, headerTable)

        return initialNode, headerTable

    def findPrefixPath(self, node, prefixPath):
        if node.parent is not None:
            prefixPath.append(node.itemName)
            self.findPrefixPath(node.parent, prefixPath)

    def createConditionalPatternBase(self, item, headerTable):
        nodeOfTree = headerTable[item][1]
        conditionalPaths = []
        frequencyOfEachPath = []

        while nodeOfTree is not None:
            prefixPath = []
            self.findPrefixPath(nodeOfTree, prefixPath)
            prefixPath = prefixPath[::-1]

            if len(prefixPath) > 1:
                conditionalPaths.append(prefixPath[:len(prefixPath) - 1])
                frequencyOfEachPath.append(nodeOfTree.count)

            nodeOfTree = nodeOfTree.next

        return conditionalPaths, frequencyOfEachPath

    def constructConditionalTree(self, conditionalPatternBase, frequency, minimumSupport):
        conditionalHeaderTable = defaultdict(int)

        for i, itemSet in enumerate(conditionalPatternBase):
            for item in itemSet:
                conditionalHeaderTable[item] += frequency[i]

        conditionalHeaderTable = dict((item, supportValue) for item, supportValue in conditionalHeaderTable.items() if supportValue >= minimumSupport)

        if len(conditionalHeaderTable) == 0:
            return None, None

        for item in conditionalHeaderTable:
            conditionalHeaderTable[item] = [conditionalHeaderTable[item], None]

        conditionalInitialNode = FPNode('Null', 1, None)
        conditionalPatternBaseExtracted = []

        for itemset, freq in zip(conditionalPatternBase, frequency):
            conditionalPatternBaseExtracted.extend([itemset.copy() for _ in range(freq)])

        for _, itemset in enumerate(conditionalPatternBaseExtracted):
            itemset = [item for item in itemset if item in conditionalHeaderTable]
            currentNode = conditionalInitialNode

            for item in itemset:
                currentNode = self.updateTree(item, currentNode, conditionalHeaderTable)

        return conditionalInitialNode, conditionalHeaderTable

    def miningTrees(self, headerTable, prefix, freqItemsetList):
        itemlistSorted = [item[0] for item in sorted(headerTable.items(), key=lambda item: (-item[1][0], item[0]))]

        for item in itemlistSorted:
            newFreqItemset = prefix.copy()
            newFreqItemset.add(item)
            freqItemsetList.append(newFreqItemset)
            conditionalPatternBase, frequency = self.createConditionalPatternBase(item, headerTable)
            conditionalTree, newHeaderTable = self.constructConditionalTree(conditionalPatternBase, frequency, self.minimumSupport)

            if newHeaderTable is not None:
                self.miningTrees(newHeaderTable, newFreqItemset, freqItemsetList)

    def fpgrowthFromFile(self, filename):
        self.getDatasetFromFile(filename)
        self.minimumSupport = len(self.listOfItemset) * self.minimumSupport
        fpTree, headerTable = self.constructFPTree()

        if fpTree.children is None:
            print('No frequent item set')
        else:
            freqentItemset = []
            self.miningTrees(headerTable, set(), freqentItemset)
            rules = associationRule(freqentItemset, self.listOfItemset, self.minimumConfidence)
            return freqentItemset, rules

In [10]:
fpg = FPGrowth(0.4, 0.75)
freqItems, rules = fpg.fpgrowthFromFile("data/example2.csv")
print("Frequent pattern itemset: ")
print(freqItems)
print("Rules: ")
print(rules)

Frequent pattern itemset: 
[{'a'}, {'c'}, {'c', 'a'}, {'b'}, {'a', 'b'}, {'c', 'b'}, {'c', 'a', 'b'}, {'d'}, {'e'}, {'d', 'e'}]
Rules: 
[[{'c'}, {'a'}, 1.0], [{'a'}, {'c'}, 1.0], [{'b'}, {'a'}, 1.0], [{'b'}, {'c'}, 1.0], [{'b'}, {'c', 'a'}, 1.0], [{'c', 'b'}, {'a'}, 1.0], [{'a', 'b'}, {'c'}, 1.0], [{'d'}, {'e'}, 1.0], [{'e'}, {'d'}, 1.0]]
