In [1]:
import sys
import os
import time
import numpy as np

class efim:
    def __init__(self, inputFile, minUtil, sep = '\t'):
        self.inputFile = inputFile
        self.minUtil = minUtil
        self.sep = sep
        self.Patterns = {}

    # Read input file
    def read_file(self):
        file_data = {}
        twu = {}
        with open(self.inputFile, 'r') as f:
            for line in f:
                # Parse and process the line
                line = line.strip().split(":")
                line = [x.split(self.sep) for x in line]
                weight = int(line[1][0])
                items = [int(x) for x in line[2]]

                # Update file data with the parsed items
                key = tuple(line[0])
                if key not in file_data:
                    file_data[key] = items
                else:
                    file_data[key] = [x + y for x, y in zip(file_data[key], items)]

                # Update the TWU (Transaction-Weighted Utilization) dictionary
                for k in line[0]:
                    if k not in twu:
                        twu[k] = weight
                    else:
                        twu[k] += weight

        # Filter TWU dictionary based on minUtil (minimum utility threshold)
        twu = {k: v for k, v in twu.items() if v >= self.minUtil}

        # Sort TWU items by utility
        twu = {k: v for k, v in sorted(twu.items(), key=lambda item: item[1])}
        secondary = set(twu.keys())

        # Filter and sort transactions
        subtree = {}
        filtered_transactions = {}
        for k, v in file_data.items():
            # Filter transaction by secondary items and sort by TWU
            transaction = sorted((x for x in zip(k, v) if x[0] in secondary), key=lambda x: twu[x[0]])

            if transaction:
                key = tuple([x[0] for x in transaction])
                if key not in filtered_transactions:
                    filtered_transactions[key] = [[x[1] for x in transaction]] + [0]
                else:
                    filtered_transactions[key][0] = [x + y for x, y in zip(filtered_transactions[key][0], [x[1] for x in transaction])]

                # Update subtree with sub-utilities
                for i in range(len(transaction)):
                    subUtil = sum([x[1] for x in transaction[i:]])
                    item = transaction[i][0]
                    if item not in subtree:
                        subtree[item] = subUtil
                    else:
                        subtree[item] += subUtil

        file_data = filtered_transactions

        # Filter primary items based on minUtil and sort by secondary order
        primary = [key for key in subtree.keys() if subtree[key] >= self.minUtil]
        primary = sorted(primary, key=lambda x: twu[x])

        return file_data, primary, secondary

    def recursive(self, prefix, file_data, primary, secondary, min_util):
        for item in primary:
            projected_db = {}
            beta = prefix + [item]
            temp_util = 0

            # Project the database on the current item
            for k, v in file_data.items():
                if item in k:
                    index = k.index(item)
                    items = k[index+1:]
                    values = v[0][index+1:]
                    temp_util += v[0][index]

                    if items not in projected_db:
                        projected_db[items] = [values, v[1] + v[0][index]]
                    else:
                        projected_db[items][0] = [x + y for x, y in zip(projected_db[items][0], values)]
                        projected_db[items][1] += v[0][index] + v[1]

            # Calculate the utility of the current pattern
            utility = sum([x[1] for x in projected_db.values()])

            # Save the pattern if its utility is greater than or equal to the minimum utility
            if utility >= min_util:
                self.Patterns[tuple(beta)] = utility

            # Update local and subtree utilities
            local_utils = {}
            subtree_utils = {}
            for k, v in projected_db.items():
                for i in range(len(k)):
                    if k[i] in secondary:
                        if k[i] not in local_utils:
                            local_utils[k[i]] = sum(v[0]) + v[1]
                        else:
                            local_utils[k[i]] += sum(v[0]) + v[1]

                        if k[i] not in subtree_utils:
                            subtree_utils[k[i]] = sum(v[0][i:]) + v[1]
                        else:
                            subtree_utils[k[i]] += sum(v[0][i:]) + v[1]

            # Filter items based on the minimum utility
            local_utils = {key: value for key, value in local_utils.items() if value >= min_util}
            subtree_utils = {key: value for key, value in subtree_utils.items() if value >= min_util}

            # Recursively mine the projected database for high-utility itemsets
            self.recursive(beta, projected_db, list(subtree_utils.keys()), list(local_utils.keys()), min_util)


    def run(self):

        start = time.time()

        fileData, primary, secondary = self.read_file()

        self.recursive([], fileData, primary, secondary, minUtil)

        end = time.time()
        print("Time taken: " + str(end - start))

    def savePatterns(self, outputFile):
        with open(outputFile, 'w') as f:
            for key, value in self.Patterns.items():
                f.write(str(key) + ": " + str(value) + "\n")

    def getPatterns(self):
        return self.Patterns
    
    def savePatterns(self, outputFile):
        with open(outputFile, 'w') as f:
            for key, value in self.Patterns.items():
                f.write(str(key) + ": " + str(value) + "\n")


if __name__ == "__main__":
    #inputFile = 'Utility_kosarak.csv'
    inputFile = 'Utility_T10I4D100K.csv'
    minUtil = 10000
    sep = "\t"
    f = efim(inputFile, minUtil, sep)
    f.run()
    print("# of patterns: " + str(len(f.getPatterns())))
    f.savePatterns("mine.txt")

    from PAMI.highUtilityPatterns.basic.EFIM import EFIM

    obj = EFIM(inputFile, minUtil, sep)
    obj.startMine()
    print("# of patterns: " + str(len(obj.getPatterns())))
    print("Time taken: " + str(obj.getRuntime()))
    obj.save('pami.txt')




Time taken: 47.402987480163574
# of patterns: 42179
High Utility patterns were generated successfully using EFIM algorithm
# of patterns: 42179
Time taken: 144.4426715373993
