In [124]:
import sys, os
from optparse import OptionParser
import csv

In [125]:
class Itemset:
    def __init__(self, item):
        self.item = item
        self.support = 0
        self.tids = set()

In [126]:
class Eclat:
    def __init__(self, filename, minsup, output=False, outfilename=None, maxdepth=0):
        self.filename = filename
        self.minsup = minsup
        self.item_count = 0
        self.trans_count = 0
        self.output = output
        self.outfilename = outfilename
        self.maxdepth = maxdepth
        self.data = None


    def read_data(self):
        self.item_count = 0
        self.trans_count = 0
        self.data = {}

        with open(self.filename, 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            for row in csvreader:
                self.trans_count += 1
                items = [item.strip() for item in row if item.strip()]

                for item in items:
                    if item:
                        if item not in self.data:
                            self.data[item] = Itemset(item)
                            self.item_count += 1

                        self.data[item].tids.add(self.trans_count)

        for _, value in self.data.items():
            value.support = len(value.tids)


    def prune_and_sort_items(self):
        keys_to_delete = [key for key, itemset in self.data.items() if itemset.support < self.minsup]

        for key in keys_to_delete:
            del self.data[key]

        self.data = {k: v for k, v in sorted(self.data.items(), key=lambda item: item[1].support)}


    def calculate_support(self, itemset):
        common_tids = None

        for item in itemset:
            item_data = self.data[item]
            if common_tids is None:
                common_tids = set(item_data.tids)
            else:
                common_tids = common_tids.intersection(item_data.tids)

        # print("itemset", itemset, "common_tids", common_tids)
        return len(common_tids)
    

    def eclat_mine(self, prefix, items, minsup, k, frequent_itemsets):
        print("prefix", prefix)
        support = self.calculate_support(prefix)

        if support >= minsup:
            frequent_itemsets.append(prefix)

        if support < minsup:
            return

        for item in items:
            new_prefix = prefix | {item}
            new_items = items.difference({item})
            
            new_prefix_support = self.calculate_support(new_prefix)
            if new_prefix_support >= minsup and new_prefix not in frequent_itemsets:
                self.eclat_mine(new_prefix, new_items, minsup, k, frequent_itemsets)
            
            # self.eclat_mine(new_prefix, new_items, minsup, k, frequent_itemsets)


    def run(self):
        self.read_data()
        self.prune_and_sort_items()
        minsup = self.minsup
        frequent_itemsets = []

        items = set(self.data.keys())

        for item in items:
            self.eclat_mine({item}, items.difference({item}), minsup, 1, frequent_itemsets)

        # frequent_itemsets = [list(itemset) for itemset in frequent_itemsets]
        
        # for i in range(len(frequent_itemsets)):
        #     print(frequent_itemsets[i])


filename = "../notebook/data/example2.csv"
minsup = 2
outfile = "../notebook/output/example_eclat.csv"

e = Eclat(filename, minsup, output=True, outfilename=outfile, maxdepth=0)
e.run()

prefix {'e'}
prefix {'e', 'd'}
prefix {'c'}
prefix {'a', 'c'}
prefix {'a', 'c', 'b'}
prefix {'c', 'b'}
prefix {'a'}
prefix {'a', 'b'}
prefix {'d'}
prefix {'b'}
