In [183]:
from collections import defaultdict

transactions = []
min_util = 1000000
utility_lists = {}
parsed_trans = []
TWU = defaultdict(int)
revised_trans = []
with open('Chicago_Crimes_2001_to_2017_utility.txt', 'r') as file:
    for line in file:
        transactions.append(line.strip())

In [185]:
def parse(line, tid):
    items, total_util, item_utils = line.split(':')
    items = list(map(int, items.strip().split()))
    total_util = float(total_util.strip())
    item_utils = list(map(float, item_utils.strip().split()))
    parsed_trans.append((tid, items, item_utils, total_util))
    for item in items:
        TWU[item] += total_util


In [187]:
def revise():
    kept = {item for item in TWU if TWU[item] >= min_util}
    sorted_items = sorted(kept, key=lambda i: (TWU[i], i))
    ordering = {item: idx for idx, item in enumerate(sorted_items)}

    for tid, items, utils, tu in parsed_trans:
        revised = [(item, util) for item, util in zip(items, utils) if item in kept]
        revised.sort(key=lambda x: ordering[x[0]])
        if revised:
            revised_trans.append((tid, revised))

In [189]:
class UtilityListEntry:
    def __init__(self, tid, iu, ru):
        self.tid = tid
        self.iu = iu
        self.ru = ru

In [191]:
def build_item_UL():
    for tid, revised in revised_trans:
        items = [item for item,_ in revised]
        utils = [util for _,util in revised]
        for i in range(len(items)):
            item = items[i]
            iu = utils[i]
            ru = sum(utils[i+1:])
            if item not in utility_lists:
                utility_lists[item] = []
            utility_lists[item].append(UtilityListEntry(tid, iu, ru))

In [193]:
def construct(prefix, x, y):
    newUL = []
    mapY = {e.tid: e for e in y}
    mapP = {e.tid: e for e in prefix} if prefix else {}

    for Xentry in x:
        Yentry = mapY.get(Xentry.tid)
        if Yentry:
            if prefix:
                Pentry = mapP[Xentry.tid]
                iu = Xentry.iu + Yentry.iu - Pentry.iu
            else:
                iu = Xentry.iu + Yentry.iu
            ru = Yentry.ru
            newUL.append(UtilityListEntry(Xentry.tid, iu, ru))

    return newUL
    

In [195]:
def huiMiner(prefix, ULs):
    for i in range(len(ULs)):
        Xi, xUL = ULs[i]
        newPrefix = prefix + (Xi,)
        sumIU = sum(e.iu for e in xUL)
        sumTOT = sum(e.ru + e.iu for e in xUL)

        if sumIU >= min_util:
            print(f"High Utility Itemset: {newPrefix}, Utility: {sumIU}")

        if sumTOT >= min_util:
            extendedULs = []
            for j in range(i+1, len(ULs)):
                Yj, yUL = ULs[j]
                newUL = construct(xUL, xUL, yUL)
                if newUL:
                    extendedULs.append((Yj, newUL))
            huiMiner(newPrefix, extendedULs)
            
    

In [197]:
def run():
    for tid, line in enumerate(transactions):
        parse(line, tid)
    revise()
    build_item_UL()
    ULs = sorted(utility_lists.items(), key=lambda x: TWU[x[0]])
    huiMiner(tuple(), ULs)

In [199]:
import time
import os
import matplotlib.pyplot as plt


print("running HUIMiner at min util of: ")
print(min_util)

start = time.time()
run()
end = time.time()
elapsed = end - start

print("Time to run")
print(elapsed)
    

running HUIMiner at min util of: 
1000000
High Utility Itemset: (1,), Utility: 1640506.0
High Utility Itemset: (8,), Utility: 1442717.0
Time to run
103.83925986289978
