In [1]:
import sys, os
from optparse import OptionParser
import csv
from itertools import chain, combinations

### Eclat Implementation

In [2]:
class Itemset:
    def __init__(self, item):
        self.item = item
        self.support = 0
        self.tids = set()

In [3]:
class Eclat:
    def __init__(self, filename, minsup, output=False, outfilename=None, maxdepth=0):
        self.filename = filename
        self.minsup = minsup
        self.item_count = 0
        self.trans_count = 0
        self.output = output
        self.outfilename = outfilename
        self.maxdepth = maxdepth
        self.data = None


    def read_data(self):
        self.item_count = 0
        self.trans_count = 0
        self.data = {}
        listOfItemInTransaction = []

        with open(self.filename, 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            for row in csvreader:
                self.trans_count += 1
                items = [item.strip() for item in row if item.strip()]
                listOfItemInTransaction.append(items)
                    
                for item in items:
                    if item:
                        if item not in self.data:
                            self.data[item] = Itemset(item)
                            self.item_count += 1

                        self.data[item].tids.add(self.trans_count)

        for _, value in self.data.items():
            value.support = len(value.tids)
            
        return listOfItemInTransaction
    

    def prune_and_sort_items(self):
        keys_to_delete = [key for key, itemset in self.data.items() if itemset.support < self.minsup]

        for key in keys_to_delete:
            del self.data[key]

        self.data = {k: v for k, v in sorted(self.data.items(), key=lambda item: item[1].support)}


    def calculate_support(self, itemset):
        common_tids = None

        for item in itemset:
            item_data = self.data[item]
            if common_tids is None:
                common_tids = set(item_data.tids)
            else:
                common_tids = common_tids.intersection(item_data.tids)

        return len(common_tids), common_tids
    

    # Eclat mine without k-itemset and store in list
    # def eclat_mine(self, prefix, items, minsup, k, frequent_itemsets):
    #     print("prefix", prefix)
    #     print("items", items)
    #     support = self.calculate_support(prefix)

    #     if support >= minsup:
    #         frequent_itemsets.append(prefix)

    #     if support < minsup:
    #         return

    #     for item in items:
    #         new_prefix = prefix | {item}
    #         new_items = items.difference({item})
            
    #         new_prefix_support = self.calculate_support(new_prefix)
    #         if new_prefix_support >= minsup and new_prefix not in frequent_itemsets:
    #             self.eclat_mine(new_prefix, new_items, minsup, k, frequent_itemsets)
    
    
    # Eclat mine using k-itemset and store in dictionary    
    def eclat_mine(self, prefix, items, minsup, k, frequent_itemsets):
        support, common_tids = self.calculate_support(prefix)

        if support >= minsup:
            frequent_itemsets[k] = frequent_itemsets.get(k, [])
            itemset_data = (frozenset(prefix), support, common_tids)
            if itemset_data not in frequent_itemsets[k]:
                frequent_itemsets[k].append(itemset_data)

        if support < minsup or k < 1:
            return

        for item in items:
            new_prefix = prefix | {item}
            new_items = items.difference({item})

            self.eclat_mine(new_prefix, new_items, minsup, k + 1, frequent_itemsets)




    def run(self):
        listOfItemInTransaction = self.read_data()
        self.prune_and_sort_items()
        minsup = self.minsup
        # without k-itemset
        # frequent_itemsets = [] 
        
        # using k-itemset
        frequent_itemsets = {} 

        items = set(self.data.keys())

        for item in items:
            print("---NEW---")
            self.eclat_mine({item}, items.difference({item}), minsup, 1, frequent_itemsets)

        return listOfItemInTransaction, frequent_itemsets

### Association Rule

In [4]:
def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))

In [5]:
def getSupport(testSet, itemSetList):
    count = 0
    for itemSet in itemSetList:
        if(set(testSet).issubset(itemSet)):
            count += 1
    return count

In [6]:
def associationRule(freqItemSet, itemSetList, minConf):
    rules = []

    for itemSet in freqItemSet:
        subsets = powerset(itemSet)
        itemSetSup = getSupport(itemSet, itemSetList)
        
        for s in subsets:
            confidence = float(itemSetSup / getSupport(s, itemSetList))
            if(confidence > minConf):
                rules.append([set(s), set(itemSet.difference(s)), confidence])
    
    return rules

### Trigger

In [7]:
filename = "../notebook/data/example2.csv"
minsup = 2
outfile = "../notebook/output/example_eclat.csv"

e = Eclat(filename, minsup, output=True, outfilename=False, maxdepth=0)
listOfItemInTransaction, freqItems = e.run()

# print("freqItems", freqItems)
for key, val in freqItems.items():
    print("key", key, " val:", val)

# rules = associationRule(freqItems, listOfItemInTransaction, 0.7)
# rules

---NEW---
---NEW---
---NEW---
---NEW---
---NEW---
key 1  val: [(frozenset({'d'}), 2, {2, 4}), (frozenset({'b'}), 2, {1, 3}), (frozenset({'a'}), 3, {1, 2, 3}), (frozenset({'c'}), 3, {1, 2, 3}), (frozenset({'e'}), 2, {2, 4})]
key 2  val: [(frozenset({'d', 'e'}), 2, {2, 4}), (frozenset({'a', 'b'}), 2, {1, 3}), (frozenset({'c', 'b'}), 2, {1, 3}), (frozenset({'a', 'c'}), 3, {1, 2, 3})]
key 3  val: [(frozenset({'a', 'c', 'b'}), 2, {1, 3})]
