In [1]:
from collections import defaultdict
from typing import List, Set, Dict, Tuple
import math
import copy

In [2]:
class Item:
    def __init__(self, name: str) -> None:
        self.name = name[0].upper()

    def __repr__(self) -> str:
        return f"Item({self.name})"

    def __eq__(self, other) -> bool:
        return self.name == other.name

    def __ne__(self, other) -> bool:
        return not self.__eq__(other)

    def __hash__(self) -> int:
        return hash(self.name)

In [3]:
class Itemset:
    def __init__(self, items="") -> None:
        self.items = "".join(sorted(set(items.upper())))

    def __repr__(self) -> str:
        return f"Itemset({self.items})"

    def __eq__(self, other) -> bool:
        return self.items == other.items

    def __ne__(self, other) -> bool:
        return not self.__eq__(other)

    def __hash__(self) -> int:
        return hash(self.items)

    def __contains__(self, item: Item) -> bool:
        return item.name in self.items

    def __iter__(self):
        for item in self.items:
            yield Item(item)

    def union(self, other):
        if isinstance(other, Item):
            self.items = Itemset(self.items + other.name).items
        elif isinstance(other, Itemset):
            self.items = Itemset(self.items + other.items).items
        else:
            raise ValueError()
        return self

In [4]:
def item_utility_in_transaction(item: Item, transaction: Dict[Item, int], external_utility_table: Dict[Item, int]) -> int:
    return transaction[item] * external_utility_table[item]

def itemset_utility_in_transaction(itemset: Itemset, transaction: Dict[Item, int], external_utility_table: Dict[Item, int]) -> int:
    if all(item in transaction for item in itemset):
        return sum(item_utility_in_transaction(item, transaction, external_utility_table) for item in itemset)
    raise ValueError("Itemset not found in transaction")

def itemset_utility_in_database(itemset: Itemset, transaction_database: Dict[str, Dict[Item, int]], external_utility_table: Dict[Item, int]) -> int:
    return sum(itemset_utility_in_transaction(itemset, transaction, external_utility_table) for transaction in transaction_database.values() if all(item in transaction for item in itemset))

def transaction_utility_in_database(transaction: Dict[Item, int], external_utility_table: Dict[Item, int]) -> int:
    return sum(item_utility_in_transaction(item, transaction, external_utility_table) for item in transaction)

def database_utility(transaction_database: Dict[str, Dict[Item, int]], external_utility_table: Dict[Item, int]) -> int:
    return sum(transaction_utility_in_database(transaction, external_utility_table) for transaction in transaction_database.values())

In [5]:
class ULElem:
    def __init__(self, TID=None, tns=None, item_utility=None):
        self.TID = TID
        self.tns = tns
        self.item_utility = item_utility

    def __repr__(self):
        return f"ULElem(TID={repr(self.TID)}, tns={repr(self.tns)}, item_utility={repr(self.item_utility)})"

In [6]:
class UTList:
    def __init__(self, item_name: Item):
        self.item_name = item_name
        self.SINS = 0
        self.sum_utility = 0
        self.ULElems = []

    def __repr__(self):
        ulelems_repr = ',\n    '.join(repr(ulelem) for ulelem in self.ULElems)
        return (f"UTList(\n"
                f"  item_name={repr(self.item_name)},\n"
                f"  SINS={self.SINS},\n"
                f"  sum_utility={self.sum_utility},\n"
                f"  ULElems=[\n    {ulelems_repr}\n  ]\n"
                f")")

def SINS(sensitive_item: Item, NS: Set[Itemset]) -> int:
    SINS = sum(1 for itemset in NS if sensitive_item in itemset)
    return SINS

def tns(NS: Set[Itemset], transaction: Dict[Item, int]) -> float:
    NSI_num = sum(1 for itemset in NS if all(item in transaction for item in itemset))
    tns = 1 / (1 + NSI_num)
    return round(tns, 2)

In [7]:
class UTLDic:
    def __init__(self, transaction_database: Dict[str, Dict[Item, int]], external_utility_table: Dict[Item, int], S: Set[Itemset], NS: Set[Itemset], delta: int):
        self.transaction_database = transaction_database
        self.external_utility_table = external_utility_table
        self.S = S
        self.NS = NS
        self.delta = delta
        self.__UTLDic = self.__construct_UTLDic()

    def __construct_UTLDic(self) -> Dict[Item, UTList]:
        # Initialize SItem
        SItem = Itemset()

        # Get union of all sensitive high-utility itemsets
        [SItem.union(itemset) for itemset in self.S]

        # Initialize UTLDic
        utldic = {}
        for item in SItem:
            # Initialize UTList for each item
            UTL = UTList(item)

            # Calculate SINS for each item
            UTL.SINS = SINS(item, self.NS)

            utldic[item] = UTL

        # Scan database
        for tid, transaction in self.transaction_database.items():
            # Get set of sensitive items in the transaction
            SI = set().union(*[itemset for itemset in S if all(item in transaction for item in itemset)])

            # Construct UTList for each sensitive item
            for item in SI:
                ULE = ULElem()
                ULE.TID = tid
                ULE.item_utility = item_utility_in_transaction(item, transaction, self.external_utility_table)
                ULE.tns = tns(self.NS, transaction)
                utldic[item].ULElems.append(ULE)
                utldic[item].sum_utility += ULE.item_utility

        return utldic

    def __repr__(self):
        utldic_repr = ',\n  '.join(f'{repr(item)}: {repr(utlist)}' for item, utlist in self.__UTLDic.items())
        return f"UTLDic(\n  {utldic_repr}\n)"

    def __getitem__(self, item):
        if isinstance(item, Item):
            return self.__UTLDic[item]
        else:
            raise ValueError()

    def __setitem__(self, item, utlist):
        if isinstance(item, Item) and isinstance(utlist, UTList):
            self.__UTLDic[item] = utlist
        else:
            raise ValueError()

    def __iter__(self):
        return iter(self.__UTLDic.items())

    def L(self, itemset: Itemset):
        return set.intersection(*[set(elem.TID for elem in self[item].ULElems) for item in itemset])

In [8]:
def hide_sensitive_high_utility_itemsets(utldic: UTLDic) -> UTLDic:
        sanitized_utldic = copy.deepcopy(utldic)

        # Sort S in descending order of u(Si) (Si in S)
        S_sorted = sorted(sanitized_utldic.S, key=lambda x: itemset_utility_in_database(x, sanitized_utldic.transaction_database, sanitized_utldic.external_utility_table), reverse=True)

        for Si in S_sorted:
            # Sort Si in ascending order of SINS(item) (item in Si)
            Si_sorted = sorted(Si, key=lambda x: sanitized_utldic[x].SINS)

            # Calculate l = L(Si) according to Definition 19
            l = sanitized_utldic.L(Si)

            # Calculate target utility to be reduced
            target_util = itemset_utility_in_database(Si, sanitized_utldic.transaction_database, sanitized_utldic.external_utility_table) - sanitized_utldic.delta + 1

            while target_util > 0:
                for item in Si_sorted:
                    # Sort ULElems of UTlist order by tns desc and utility asc
                    sanitized_utldic[item].ULElems.sort(key=lambda x: (-x.tns, x.item_utility))

                    for elem in sanitized_utldic[item].ULElems:
                        if (elem.TID in l) and (target_util > 0):
                            reduced_utility = 0

                            if elem.item_utility <= target_util:
                                target_util -= elem.item_utility
                                reduced_utility = elem.item_utility
                                elem.item_utility = 0
                            else:
                                count = sanitized_utldic.transaction_database[elem.TID][item] - math.ceil(target_util / sanitized_utldic.external_utility_table[item])
                                reduced_utility = elem.item_utility - count * sanitized_utldic.external_utility_table[item]
                                elem.item_utility = count * sanitized_utldic.external_utility_table[item]
                                target_util = 0

                            # update utldic[item].sum_utility
                            sanitized_utldic[item].sum_utility -= reduced_utility

        return sanitized_utldic

In [9]:
def generate_sanitized_database(sanitized_utldic: UTLDic) -> Dict[str, Dict[Item, int]]:
    # Create a deep copy of the original database
    sanitized_td = copy.deepcopy(sanitized_utldic.transaction_database)

    # Iterate through each sensitive item in the sanitized UTLDic
    for item, utlist in sanitized_utldic:
        # Process each ULElem in the UTList
        for elem in utlist.ULElems:
            tid = elem.TID
            modified_utility = elem.item_utility

            # Calculate the new internal utility based on the modified utility
            new_internal_utility = int(modified_utility / sanitized_utldic.external_utility_table[item])

            if new_internal_utility == 0:
                # Remove the item if its new internal utility is 0
                del sanitized_td[tid][item]
            else:
                # Update the item's internal utility in the transaction
                sanitized_td[tid][item] = new_internal_utility

    # Remove empty transactions in sanitized database
    for tid, transaction in sanitized_td.items():
        if not transaction:
            del sanitized_td[tid]

    # Return the sanitized database
    return sanitized_td

In [10]:
transaction_database = {
    'T1': {Item('C'): 7, Item('D'): 1, Item('E'): 1},
    'T2': {Item('A'): 1, Item('C'): 2, Item('E'): 2},
    'T3': {Item('B'): 6, Item('C'): 4, Item('D'): 3, Item('E'): 7},
    'T4': {Item('B'): 5, Item('C'): 3, Item('D'): 9},
    'T5': {Item('A'): 3, Item('C'): 10, Item('D'): 3},
    'T6': {Item('C'): 5, Item('E'): 9},
    'T7': {Item('A'): 6, Item('C'): 9, Item('D'): 2, Item('E'): 5},
    'T8': {Item('A'): 1, Item('B'): 6, Item('C'): 2, Item('D'): 5, Item('E'): 3}
}

external_utility_table = {Item('A'): 9, Item('B'): 11, Item('C'): 4, Item('D'): 6, Item('E'): 7}

HUIs = {Itemset('ACDE'), Itemset('BCDE'), Itemset('BC'),
        Itemset('ACD'), Itemset('BCE'), Itemset('CDE'),
        Itemset('BE'), Itemset('BD'), Itemset('CE'),
        Itemset('BDE'), Itemset('BCD'), Itemset('CD')}

S = {Itemset('ACD'), Itemset('BC')}

NS = HUIs - S

delta = 200  # Minimum utility threshold

---

Kiểm tra độ chính xác của cột **tu** trong Table **1**

![Table 1: A Transaction Database](https://trankhacbinh.github.io/image-repo/Table_1_A_Transaction_Database.png)

In [11]:
{tid: transaction_utility_in_database(transaction, external_utility_table) for tid, transaction in transaction_database.items()}

{'T1': 41,
 'T2': 31,
 'T3': 149,
 'T4': 121,
 'T5': 85,
 'T6': 83,
 'T7': 137,
 'T8': 134}

---

Kiểm tra độ chính xác của Table **3**

![Table 3: HUIs Table](https://trankhacbinh.github.io/image-repo/Table_3_HUIs_Table.png)

In [12]:
{itemset: itemset_utility_in_database(itemset, transaction_database, external_utility_table) for itemset in HUIs}

{Itemset(BCD): 325,
 Itemset(BCDE): 274,
 Itemset(BCE): 226,
 Itemset(BD): 289,
 Itemset(CDE): 266,
 Itemset(BE): 202,
 Itemset(BDE): 250,
 Itemset(ACDE): 205,
 Itemset(ACD): 234,
 Itemset(CE): 305,
 Itemset(BC): 223,
 Itemset(CD): 278}



---



Kiểm tra độ chính xác của Figure **4**

![Fig 4: An example of the UTlists](https://trankhacbinh.github.io/image-repo/Fig_4_An_example_of_the_UTlists.png)

In [13]:
utldic = UTLDic(transaction_database, external_utility_table, S, NS, delta)
utldic

UTLDic(
  Item(A): UTList(
  item_name=Item(A),
  SINS=1,
  sum_utility=90,
  ULElems=[
    ULElem(TID='T5', tns=0.5, item_utility=27),
    ULElem(TID='T7', tns=0.2, item_utility=54),
    ULElem(TID='T8', tns=0.09, item_utility=9)
  ]
),
  Item(B): UTList(
  item_name=Item(B),
  SINS=6,
  sum_utility=187,
  ULElems=[
    ULElem(TID='T3', tns=0.1, item_utility=66),
    ULElem(TID='T4', tns=0.25, item_utility=55),
    ULElem(TID='T8', tns=0.09, item_utility=66)
  ]
),
  Item(C): UTList(
  item_name=Item(C),
  SINS=7,
  sum_utility=112,
  ULElems=[
    ULElem(TID='T3', tns=0.1, item_utility=16),
    ULElem(TID='T4', tns=0.25, item_utility=12),
    ULElem(TID='T5', tns=0.5, item_utility=40),
    ULElem(TID='T7', tns=0.2, item_utility=36),
    ULElem(TID='T8', tns=0.09, item_utility=8)
  ]
),
  Item(D): UTList(
  item_name=Item(D),
  SINS=7,
  sum_utility=60,
  ULElems=[
    ULElem(TID='T5', tns=0.5, item_utility=18),
    ULElem(TID='T7', tns=0.2, item_utility=12),
    ULElem(TID='T8', tns=



---



Kiểm tra độ chính xác của Figure **6**

![Fig 6: An example of the sanitized UTLDic](https://trankhacbinh.github.io/image-repo/Fig_6%20_An_example_of_the_sanitized_UTLDic.png)

In [14]:
sanitized_utldic = hide_sensitive_high_utility_itemsets(utldic)
sanitized_utldic

UTLDic(
  Item(A): UTList(
  item_name=Item(A),
  SINS=1,
  sum_utility=54,
  ULElems=[
    ULElem(TID='T5', tns=0.5, item_utility=0),
    ULElem(TID='T7', tns=0.2, item_utility=45),
    ULElem(TID='T8', tns=0.09, item_utility=9)
  ]
),
  Item(B): UTList(
  item_name=Item(B),
  SINS=6,
  sum_utility=154,
  ULElems=[
    ULElem(TID='T4', tns=0.25, item_utility=22),
    ULElem(TID='T3', tns=0.1, item_utility=66),
    ULElem(TID='T8', tns=0.09, item_utility=66)
  ]
),
  Item(C): UTList(
  item_name=Item(C),
  SINS=7,
  sum_utility=112,
  ULElems=[
    ULElem(TID='T5', tns=0.5, item_utility=40),
    ULElem(TID='T4', tns=0.25, item_utility=12),
    ULElem(TID='T7', tns=0.2, item_utility=36),
    ULElem(TID='T3', tns=0.1, item_utility=16),
    ULElem(TID='T8', tns=0.09, item_utility=8)
  ]
),
  Item(D): UTList(
  item_name=Item(D),
  SINS=7,
  sum_utility=60,
  ULElems=[
    ULElem(TID='T5', tns=0.5, item_utility=18),
    ULElem(TID='T7', tns=0.2, item_utility=12),
    ULElem(TID='T8', tns=0



---



In [15]:
sanitized_td = generate_sanitized_database(sanitized_utldic)
sanitized_td

{'T1': {Item(C): 7, Item(D): 1, Item(E): 1},
 'T2': {Item(A): 1, Item(C): 2, Item(E): 2},
 'T3': {Item(B): 6, Item(C): 4, Item(D): 3, Item(E): 7},
 'T4': {Item(B): 2, Item(C): 3, Item(D): 9},
 'T5': {Item(C): 10, Item(D): 3},
 'T6': {Item(C): 5, Item(E): 9},
 'T7': {Item(A): 5, Item(C): 9, Item(D): 2, Item(E): 5},
 'T8': {Item(A): 1, Item(B): 6, Item(C): 2, Item(D): 5, Item(E): 3}}

In [16]:
{itemset: itemset_utility_in_database(itemset, transaction_database, external_utility_table) for itemset in S}

{Itemset(ACD): 234, Itemset(BC): 223}

In [17]:
{itemset: itemset_utility_in_database(itemset, sanitized_td, external_utility_table) for itemset in S}

{Itemset(ACD): 140, Itemset(BC): 190}



---

