# MASK-Maintaining Data Privacy in Association Rule Mining

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](
link)

## Processing the data

In [58]:

test_ds_path = ".\..\Datasets\store_data.csv"

with open(test_ds_path, "r") as f:

    lines = f.readlines()
    inventory = list()
    tuples = list()

    for line in lines:
        transaction = line.strip().split(",")
        #print(transaction)
        tuples.append(transaction)
        for element in transaction:
            if element.strip().replace(' ','_').replace('&','and') not in inventory:
                inventory.append(element.strip().replace(' ','_').replace('&','and'))

In [59]:
print(len(inventory))

120


In [60]:

print(inventory)
inventory.pop()
inventory.sort()
print(inventory)

['shrimp', 'almonds', 'avocado', 'vegetables_mix', 'green_grapes', 'whole_weat_flour', 'yams', 'cottage_cheese', 'energy_drink', 'tomato_juice', 'low_fat_yogurt', 'green_tea', 'honey', 'salad', 'mineral_water', 'salmon', 'antioxydant_juice', 'frozen_smoothie', 'spinach', 'olive_oil', 'burgers', 'meatballs', 'eggs', 'chutney', 'turkey', 'milk', 'energy_bar', 'whole_wheat_rice', 'whole_wheat_pasta', 'french_fries', 'soup', 'light_cream', 'shallot', 'frozen_vegetables', 'spaghetti', 'pet_food', 'cookies', 'cooking_oil', 'champagne', 'chocolate', 'chicken', 'oil', 'fresh_tuna', 'tomatoes', 'black_tea', 'extra_dark_chocolate', 'protein_bar', 'red_wine', 'pasta', 'pepper', 'shampoo', 'rice', 'sparkling_water', 'ham', 'body_spray', 'pancakes', 'grated_cheese', 'white_wine', 'toothpaste', 'parmesan_cheese', 'fresh_bread', 'ground_beef', 'escalope', 'herb_and_pepper', 'tomato_sauce', 'magazines', 'strawberries', 'strong_cheese', 'pickles', 'cake', 'hot_dogs', 'brownies', 'cereals', 'clothes_acc

In [61]:
print(len(inventory))

119


In [62]:
import pandas as pd
import numpy as np
import math
import random
from pandas import DataFrame


In [63]:
test_dataset = pd.DataFrame(
    [[1 if item in row else 0 for item in inventory] for row in tuples],
    columns=inventory
)

In [64]:
test_dataset.head(3)

Unnamed: 0,almonds,antioxydant_juice,asparagus,avocado,babies_food,bacon,barbecue_sauce,black_tea,blueberries,body_spray,...,turkey,vegetables_mix,water_spray,white_wine,whole_weat_flour,whole_wheat_pasta,whole_wheat_rice,yams,yogurt_cake,zucchini
0,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [65]:
test_dataset.columns

Index(['almonds', 'antioxydant_juice', 'asparagus', 'avocado', 'babies_food',
       'bacon', 'barbecue_sauce', 'black_tea', 'blueberries', 'body_spray',
       ...
       'turkey', 'vegetables_mix', 'water_spray', 'white_wine',
       'whole_weat_flour', 'whole_wheat_pasta', 'whole_wheat_rice', 'yams',
       'yogurt_cake', 'zucchini'],
      dtype='object', length=119)

In [66]:
type(test_dataset.columns)

pandas.core.indexes.base.Index

In [67]:
for tuple in test_dataset.itertuples():
    print(tuple)
    print(int( getattr(tuple, 'almonds')))
    print(type(tuple))
    break

Pandas(Index=0, almonds=1, antioxydant_juice=0, asparagus=0, avocado=1, babies_food=0, bacon=0, barbecue_sauce=0, black_tea=0, blueberries=0, body_spray=0, bramble=0, brownies=0, bug_spray=0, burger_sauce=0, burgers=0, butter=0, cake=0, candy_bars=0, carrots=0, cauliflower=0, cereals=0, champagne=0, chicken=0, chili=0, chocolate=0, chocolate_bread=0, chutney=0, cider=0, clothes_accessories=0, cookies=0, cooking_oil=0, corn=0, cottage_cheese=0, cream=0, dessert_wine=0, eggplant=0, eggs=0, energy_bar=0, energy_drink=0, escalope=0, extra_dark_chocolate=0, flax_seed=0, french_fries=0, french_wine=0, fresh_bread=0, fresh_tuna=0, fromage_blanc=0, frozen_smoothie=0, frozen_vegetables=0, gluten_free_bar=0, grated_cheese=0, green_beans=0, green_grapes=0, green_tea=0, ground_beef=0, gums=0, ham=0, hand_protein_bar=0, herb_and_pepper=0, honey=1, hot_dogs=0, ketchup=0, light_cream=0, light_mayo=0, low_fat_yogurt=0, magazines=0, mashed_potato=0, mayonnaise=0, meatballs=0, melons=0, milk=0, mineral_

## Implementing utility functions and classes

In [68]:
from numpy import float64
class Rule:
    def __init__(self,itemset: list | dict,support: float = 0, confidence: float = 0):
        self.itemset = itemset
        self.support = support
        self.confidence = confidence

    def __str__(self):
        return f"Rule('{self.itemset}', '{self.support}')"
    
    def __repr__(self):
        return f"Rule('{self.itemset}', '{self.support}')"
    
    def __eq__(self, other):
        if isinstance(other, Rule):
            return set(self.itemset) == set(other.itemset)
        return False
    
    def __iter__(self):
        return iter([self.itemset, self.support, self.confidence])
    

class AprioriRule(Rule):
    def __init__(self,itemset: list | dict,support: float = 0):
        super().__init__(itemset,support)

class MASKRule(Rule):
    def __init__(self,itemset: list | dict,support: float = 0):
        super().__init__(itemset,support)
        self.counters = np.zeros(len(itemset)+1)

In [69]:

from pandas import DataFrame


def support(T: DataFrame ,X_U_Y: list | dict):
    '''
    Parameters:
    T (dataframe)
    X_U_Y (list | dict) : name of the attributes considered (X U Y)
    Return:
    float: support of the attributes in the dataset
    '''
    for attribute in X_U_Y:
        if attribute not in T.columns:
             return ValueError
    count = 0
    for tuple in T.itertuples(False):
        contained = True
        for attribute in X_U_Y:
            try: 
                if getattr(tuple,attribute) == 0:
                    contained = False
                    break
            except AttributeError:
                contained = False,
                break

        if contained:
            count += 1
    return count/len(T)
    

In [70]:
support(test_dataset,['herb_and_pepper'])

0.0

In [71]:

def confidence(T: DataFrame, X: list | dict, Y: list | dict):
    '''
    Parameters:
    T (dataframe)
    X ( list | dict): X part of X ==> Y rule
    X (list | dict): Y part of X ==> Y rule
    '''
    if len(X)+len(Y) > len(T.columns):
        return ValueError
    for item in X:
        if Y.__contains__(item):
            return ValueError # XY = empty set
    
    countX=0
    countY=0
    for tuple in T.itertuples():
        containedX = True
        for attribute in X:
            if getattr(tuple,attribute) == 0:
                containedX = False
                break
        if containedX:
            countX += 1
            containedY=True
            for attribute in Y:
                if attribute == '':
                    containedY = False
                    break
                if getattr(tuple,attribute) == 0:
                    containedY = False
                    break
            if containedY:
                countY += 1
    return countY/countX    

In [72]:
confidence(test_dataset,['spaghetti','champagne'],['cookies'])
#spaghetti,champagne,cookies

0.08

In [92]:
def Apriori(items, dataset, min_sup, levels: int = None):
    if levels is None:
        levels = len(items)
    '''
    rules[0] = empty set
    rules[1] = rules of length 1
    rules[2] = rules of length 2


    rules[len(items)] = items
    ...
    '''
    rules = [
        [],
        [ AprioriRule([item]) for item in items ]
    ]



    # iterate over all the possible rules length from 1 to len(items)
    for i in range(1, levels+1):
        print(f"Apriori Level: {i}")
        # remove all the rules in rules[i]
        # that don't have a support of at least min_sup

        #print(f"RULES[{i}] BEFORE", rules[i])
        for j in range(len(rules[i])-1,-1,-1):
            sup = support(dataset,rules[i][j].itemset)
            if sup >= min_sup:
                rules[i][j].support = sup
            else:
                rules[i].remove(rules[i][j])
    
        '''
        print(f"RULES[{i}] SUPPORT", [
            support(dataset, rule)
            for rule in rules[i]
        ]
        )'''


        #print(f"RULES[{i}] AFTER", rules[i])

        if len(rules[i]) == 0:
            break


        # generate all the possibile 
        # rules with i+1 elements
        rules.append([]) # create the element rules[i+1]

        #print("RULE[i]", rules[i])
        #print("RULE[i+1]", rules[i+1])
        for rule in rules[i]:
            for j in range(0,len(rules[1])):

                # skip if item is already inside the rule
                if rules[1][j].itemset[0] in rule.itemset:
                    continue
                
                itemset = rule.itemset + rules[1][j].itemset
                itemset.sort()

                new_rule = AprioriRule(itemset)
                if new_rule not in rules[i+1]:

                    rules[i+1].append(new_rule)
                # create a new rule composed of rule + [item]


    return rules

In [74]:
def hammingDistanceBitwise(a:int,b:int):
    return (a^b).bit_count()

In [75]:
hammingDistanceBitwise(int(1),int(2))

2

In [76]:

def computeM(size: int, p: float):
    max_exp = int(math.log2(size))
    M = np.diag([math.pow(p,max_exp) for i in range(size)])
    for i in range(0,size):
        for j in range(i+1,size):
            difference = hammingDistanceBitwise(i,j)
            M[i][j] = math.pow(1-p,difference)*math.pow(p,max_exp-difference)

    for i in range(1,size):
        for j in range(i,-1,-1):
            M[i][j] = M[j][i]

    return M

In [77]:
M_test = computeM(int(math.pow(2,2)),0.1)
print(M_test)
print(sum(M_test[0]))

[[0.01 0.09 0.09 0.81]
 [0.09 0.01 0.81 0.09]
 [0.09 0.81 0.01 0.09]
 [0.81 0.09 0.09 0.01]]
1.0


In [78]:
from numpy.linalg import inv

p = 0.9
M_test = inv(computeM(2**2,p))
print(M_test)
print(sum(M_test[3]))

[[ 1.265625 -0.140625 -0.140625  0.015625]
 [-0.140625  1.265625  0.015625 -0.140625]
 [-0.140625  0.015625  1.265625 -0.140625]
 [ 0.015625 -0.140625 -0.140625  1.265625]]
1.0


In [79]:
def vectormatrixProdMod(linC_D,matrix):
    size = matrix.shape[0]
    row = matrix[size-1]
    sum:float = 0.0
    for j in range(len(linC_D)):
        index = int(math.pow(2,j))-1
        #print(str(row[index])+" * "+str(linC_D[j]))
        sum += row[index]*linC_D[j]
    return sum

In [80]:
support_vector = [[item,support(test_dataset,[item])] for item in inventory]
print(support_vector)
mean_support = np.mean([sublist[1] for sublist in support_vector])
print(mean_support)


[['almonds', 0.020397280362618318], ['antioxydant_juice', 0.0], ['asparagus', 0.004666044527396347], ['avocado', 0.03332888948140248], ['babies_food', 0.0], ['bacon', 0.008665511265164644], ['barbecue_sauce', 0.0], ['black_tea', 0.0], ['blueberries', 0.009198773496867084], ['body_spray', 0.0], ['bramble', 0.0018664178109585388], ['brownies', 0.03372883615517931], ['bug_spray', 0.0], ['burger_sauce', 0.0], ['burgers', 0.0871883748833489], ['butter', 0.030129316091187842], ['cake', 0.08105585921877083], ['candy_bars', 0.0], ['carrots', 0.01533128916144514], ['cauliflower', 0.004799360085321957], ['cereals', 0.025729902679642713], ['champagne', 0.04679376083188908], ['chicken', 0.05999200106652446], ['chili', 0.0061325156645780565], ['chocolate', 0.1638448206905746], ['chocolate_bread', 0.0], ['chutney', 0.0041327822956939075], ['cider', 0.010531929076123183], ['clothes_accessories', 0.0], ['cookies', 0.08038928142914278], ['cooking_oil', 0.0], ['corn', 0.004799360085321957], ['cottage_ch

## Distortion

In [81]:


def MASK_Distortion(dataset: DataFrame, p: float):
    '''
    MASK
    
    choose a probability p

    2 event
    P(x = true) -> 1-p % -> we add or remove an element in the transaction
    P(x = false) -> p% -> the transaction remains the same
    '''
    distorted_dataset = dataset.copy(deep=True)
    for i in range(0,len(distorted_dataset)):
        for column in distorted_dataset.columns:
            event = random.random()
            if event > p:
                distorted_dataset.loc[i, column] = int(not dataset.loc[i, column])

    return distorted_dataset

        



In [82]:
p = 0.9
distorted_test_dataset = MASK_Distortion(test_dataset,p)

## Rule mining

In [83]:
def MASK_Support(linC_D,db_cardinality,M_inv = None):
    if M_inv is None:
        M_inv = inv(
            computeM(
                size=int(math.pow(2,len(linC_D)-1)),
                p=p
            )
        )

    C_T_11 = vectormatrixProdMod(linC_D,M_inv)

    return C_T_11/db_cardinality
    
    

In [84]:
def MASK_Rule_Mining(dataset: DataFrame, p: float, min_sup: float,levels: int = None):
    if levels is None:
        levels = len(dataset.columns)
    rules = [
        [],
        [MASKRule([item]) for item in dataset.columns]
    ]
    frequent_itemsets = [[]]
    infrequent_itemsets = [[]]

    for i in range(1,levels+1):
        print(f"Mask Rule Mining level: {i}")

        frequent_itemsets.append([])
        infrequent_itemsets.append([])


        for tuple in dataset.itertuples():

            item_list = []
            complement_list=[]

            for item in dataset.columns:
                if getattr(tuple,item) == 1 and item not in infrequent_itemsets[i-1]:
                    item_list.append(item)
            for item in frequent_itemsets[i-1]:
                if item not in item_list:
                    complement_list.append(item)

           
            for rule in rules[i]:
                bit_counter = 0
                for item in rule.itemset:
                    if item in item_list:
                        bit_counter += 1
                
                rule.counters[bit_counter]+=1
            
        
        

        for j in range(len(rules[i])-1,-1,-1):

            size = int(math.pow(2,i))
            M_inv = inv(
                computeM(size,p)
            )

            sup = MASK_Support(rules[i][j].counters,len(dataset),M_inv)

            if sup >= min_sup:
                rules[i][j].support = sup
                for item in rules[i][j].itemset:
                    if item not in frequent_itemsets[i]:
                        frequent_itemsets[i].append(item)

            else:
                rules[i].remove(rules[i][j])
                for item in rule.itemset:
                    if item not in infrequent_itemsets[i]:
                        infrequent_itemsets[i].append(item)
            
        if len(rules[i]) == 0:
            break

        rules.append([])

        for rule in rules[i]:
            for r in rules[1]:
                if r.itemset[0] not in rule.itemset:
                    itemset =  rule.itemset + r.itemset
                    itemset.sort()
                    new_rule = MASKRule(itemset)
                    if new_rule not in rules[i+1]:
                        rules[i+1].append(new_rule)
    
    return rules

## Reduced dataset test

In [85]:
reduced_inventory = inventory[0:50]
print(reduced_inventory)

['almonds', 'antioxydant_juice', 'asparagus', 'avocado', 'babies_food', 'bacon', 'barbecue_sauce', 'black_tea', 'blueberries', 'body_spray', 'bramble', 'brownies', 'bug_spray', 'burger_sauce', 'burgers', 'butter', 'cake', 'candy_bars', 'carrots', 'cauliflower', 'cereals', 'champagne', 'chicken', 'chili', 'chocolate', 'chocolate_bread', 'chutney', 'cider', 'clothes_accessories', 'cookies', 'cooking_oil', 'corn', 'cottage_cheese', 'cream', 'dessert_wine', 'eggplant', 'eggs', 'energy_bar', 'energy_drink', 'escalope', 'extra_dark_chocolate', 'flax_seed', 'french_fries', 'french_wine', 'fresh_bread', 'fresh_tuna', 'fromage_blanc', 'frozen_smoothie', 'frozen_vegetables', 'gluten_free_bar']


In [86]:
reduced_test_ds = test_dataset.iloc[:, 0:50]
print(reduced_test_ds)

      almonds  antioxydant_juice  asparagus  avocado  babies_food  bacon  \
0           1                  0          0        1            0      0   
1           0                  0          0        0            0      0   
2           0                  0          0        0            0      0   
3           0                  0          0        1            0      0   
4           0                  0          0        0            0      0   
...       ...                ...        ...      ...          ...    ...   
7496        0                  0          0        0            0      0   
7497        0                  0          0        0            0      0   
7498        0                  0          0        0            0      0   
7499        0                  0          0        0            0      0   
7500        0                  0          0        0            0      0   

      barbecue_sauce  black_tea  blueberries  body_spray  ...  \
0                  0  

In [87]:
support_vector_reduced = [[item,support(reduced_test_ds,[item])] for item in reduced_inventory]
print(support_vector_reduced)
mean_reduced_support = np.mean([sublist[1] for sublist in support_vector_reduced])
print(mean_reduced_support)

[['almonds', 0.020397280362618318], ['antioxydant_juice', 0.0], ['asparagus', 0.004666044527396347], ['avocado', 0.03332888948140248], ['babies_food', 0.0], ['bacon', 0.008665511265164644], ['barbecue_sauce', 0.0], ['black_tea', 0.0], ['blueberries', 0.009198773496867084], ['body_spray', 0.0], ['bramble', 0.0018664178109585388], ['brownies', 0.03372883615517931], ['bug_spray', 0.0], ['burger_sauce', 0.0], ['burgers', 0.0871883748833489], ['butter', 0.030129316091187842], ['cake', 0.08105585921877083], ['candy_bars', 0.0], ['carrots', 0.01533128916144514], ['cauliflower', 0.004799360085321957], ['cereals', 0.025729902679642713], ['champagne', 0.04679376083188908], ['chicken', 0.05999200106652446], ['chili', 0.0061325156645780565], ['chocolate', 0.1638448206905746], ['chocolate_bread', 0.0], ['chutney', 0.0041327822956939075], ['cider', 0.010531929076123183], ['clothes_accessories', 0.0], ['cookies', 0.08038928142914278], ['cooking_oil', 0.0], ['corn', 0.004799360085321957], ['cottage_ch

In [88]:
p = 0.9
reduced_distorted_test_ds = MASK_Distortion(reduced_test_ds,p)

In [93]:
min_sup = 0.005
rules = Apriori(reduced_inventory,reduced_test_ds,min_sup)


Apriori Level: 1
Apriori Level: 2
Apriori Level: 3
Apriori Level: 4


In [45]:
print(rules)

[[], [Rule('['almonds']', '0.020397280362618318'), Rule('['avocado']', '0.03332888948140248'), Rule('['bacon']', '0.008665511265164644'), Rule('['blueberries']', '0.009198773496867084'), Rule('['brownies']', '0.03372883615517931'), Rule('['burgers']', '0.0871883748833489'), Rule('['butter']', '0.030129316091187842'), Rule('['cake']', '0.08105585921877083'), Rule('['carrots']', '0.01533128916144514'), Rule('['cereals']', '0.025729902679642713'), Rule('['champagne']', '0.04679376083188908'), Rule('['chicken']', '0.05999200106652446'), Rule('['chili']', '0.0061325156645780565'), Rule('['chocolate']', '0.1638448206905746'), Rule('['cider']', '0.010531929076123183'), Rule('['cookies']', '0.08038928142914278'), Rule('['eggplant']', '0.013198240234635382'), Rule('['eggs']', '0.17970937208372217'), Rule('['escalope']', '0.0793227569657379')], [Rule('['almonds', 'burgers']', '0.005199306759098787'), Rule('['almonds', 'chocolate']', '0.005999200106652446'), Rule('['almonds', 'eggs']', '0.0065324

In [90]:
MASK_rules = MASK_Rule_Mining(reduced_distorted_test_ds,p,min_sup)

Mask Rule Mining level: 1
Mask Rule Mining level: 2
Mask Rule Mining level: 3
Mask Rule Mining level: 4


In [50]:
MASK_rules = MASK_Rule_Mining(reduced_distorted_test_ds,p,min_sup)

Mask Rule Mining level: 1
Mask Rule Mining level: 2
Mask Rule Mining level: 3
Mask Rule Mining level: 4


In [None]:
print(MASK_rules)
#print(MASK_rules_2)

## Full dataset test

In [None]:
print(mean_support)



In [None]:
print(inventory)

In [44]:
def mean_support(inventory, dataset):
    support_vector = []
    for item in inventory:
        support_vector.append(support(dataset,[item]))

    return np.mean(support_vector)

In [None]:
mean_sup = mean_support(test_dataset.columns,test_dataset)
print(mean_sup)

In [56]:
'''p = 0.9
distorted_test_dataset = MASK_Distortion(test_dataset,p)'''

In [None]:
'''min_sup = 0.25
rules = Apriori(inventory,test_dataset,min_sup)'''

In [None]:
#MASK_rules = MASK_Rule_Mining(distorted_test_dataset,p,min_sup)

## Evaluating performance of the algorithm
### Metrics
* Support Error:
    $\rho = \frac{1}{|f|}\sum_{f}^{}\frac{|recSup_f - actSup_f|}{actSup_f}*100$
* Identity error:
    * $\sigma^+$ = percentage of false positive->$\sigma^+ = \frac{|R-F|}{|F|}*100$ 
    * $\sigma^-$ = percentage of false negative->$\sigma^- = \frac{|F-R|}{|F|}*100$

                


In [94]:
def support_error(AprioriRuleslevel,MASKRuleslevel):
    
    sum = 0
    cnt = 0

    for apriorirule in AprioriRuleslevel:
        if apriorirule is not None and apriorirule in MASKRuleslevel:
            cnt += 1
            index = MASKRuleslevel.index(apriorirule)
            
            rec_sup = MASKRuleslevel[index].support
            act_sup = apriorirule.support
            
            sum += abs(rec_sup-act_sup)/act_sup
    if cnt != 0:
        return (sum/cnt)*100
    else:
        return 0
    


In [95]:
def identity_error(AprioriRulesLevel,MASKRulesLevel):

    false_positive_cnt = 0
    false_negative_cnt = 0

    for rule in AprioriRulesLevel:
        if rule not in MASKRulesLevel:
            false_positive_cnt += 1
    
    for rule in MASKRulesLevel:
        if rule not in AprioriRulesLevel:
            false_negative_cnt += 1
    
    F = len(AprioriRulesLevel)

    if F != 0:
        false_positive = false_positive_cnt/F
        false_negative = false_negative_cnt/F
        return false_positive, false_negative
    else: return 0,0

    

In [96]:
for i in range(1,len(rules)):
    print(f"Level: {i},|F| = {len(rules[i])}, Support error: {support_error(rules[i],MASK_rules[i])}, Identity error: {identity_error(rules[i],MASK_rules[i])}")

Level: 1,|F| = 19, Support error: 10.215038271311592, Identity error: (0.05263157894736842, 0.47368421052631576)
Level: 2,|F| = 28, Support error: 15.03025528164144, Identity error: (0.17857142857142858, 0.39285714285714285)
Level: 3,|F| = 1, Support error: 17.61147103658544, Identity error: (0.0, 2.0)
Level: 4,|F| = 0, Support error: 0, Identity error: (0, 0)


In [55]:
'''for i in range(1,len(rules)):
    print(support_error(rules[i],MASK_rules_2[i]))'''

'for i in range(1,len(rules)):\n    print(support_error(rules[i],MASK_rules_2[i]))'

In [56]:
for i in range(1,len(rules)):
    print(identity_error(rules[i],MASK_rules_1[i]))

NameError: name 'MASK_rules_1' is not defined

In [None]:
'''for i in range(1,len(rules)):
    print(identity_error(rules[i],MASK_rules_2[i]))'''

## Evaluating MASK's privacy
### Metrics:
* Reconstruction Probability: $R(p) = aR_1(p) + (1-a)R_0(p)$
    * $R_1(p) \simeq \frac{s_0 * p^2}{s_0*p+(1-s_0)*(1-p)} + \frac{s_0 * (1-p)^2}{s_0*(1-p)+(1-s_0)*p}$
    * $R_0(p) \simeq \frac{(1-s_0) * p^2}{(1-s_0)*p+s_0*(1-p)} + \frac{(1-s_0) * (1-p)^2}{s_0*p+(1-s_0)*(1-p)}$
    * $s_0 = $ average support for an item in the database

* Privacy Measure: $P(p) = (1-R(p))*100$

In [41]:
def R_1(s_0,p):
    return ((s_0 * math.pow(p,2))/((s_0*p)+(1-s_0)*(1-p)))+((s_0 * math.pow(1-p,2))/((s_0*(1-p))+(1-s_0)*p))

def R_0(s_0,p):
    return (((1-s_0) * math.pow(p,2))/(((1-s_0)*p)+s_0*(1-p)))+(((1-s_0) * math.pow(1-p,2))/((s_0*p)+(1-s_0)*(1-p)))

def R(s_0,p,a):
    return a*R_1(s_0,p)+(1-a)*R_0(s_0,p)

def P(s_0,p,a):
    return (1-R(s_0,p,a))*100

def mean_support(inventory, dataset):
    support_vector = []
    for item in inventory:
        support_vector.append(support(dataset,item))
    return np.mean(support_vector)

In [None]:
P(mean_support,p,0.9)