In [2]:
# Loads dataset function
def load_dataset(selected_dataset_name:str, repeat : bool):
    dataset=[]
    with open(selected_dataset_name+'.txt') as f:
        lines = f.read().splitlines()
        for l in lines:
            seen = set()
            curr_line = l.replace("\t"," ").split(" ")
            if not repeat:
                curr_line = [x for x in curr_line if not (x in seen or seen.add(x))]
            dataset.append(curr_line)
    return dataset


dataset = load_dataset("skating",False)
print(f"Dataset loaded. {len(dataset)} entries found. First 5 entries: ")
for entry in dataset[:5]:
    print(entry)

Dataset loaded. 530 entries found. First 5 entries: 
['1', '29', '21', '15', '3', '16', '17', '23', '9', '24', '25', '5', '6', '30', '31', '22', '7', '26', '2', '27', '10', '11', '28', '12', '8', '18', '19', '20', '13', '14', '32', '33', '34', '4']
['1', '29', '23', '3', '41', '24', '27', '9', '35', '28', '25', '21', '42', '39', '43', '36', '37', '44', '40', '38', '5', '6', '26', '30', '31', '22', '2', '32', '15', '16', '19', '20', '17', '10', '13', '14', '18', '4']
['1', '29', '21', '9', '45', '46', '27', '28', '23', '24', '25', '30', '31', '22', '2', '10', '13', '15', '14', '16', '17', '32', '33', '18', '34', '26']
['1', '29', '41', '21', '25', '17', '26', '27', '28', '9', '35', '42', '47', '43', '36', '37', '48', '44', '30', '31', '22', '7', '2', '38', '5', '8', '6', '10', '13', '14', '18', '15', '16', '32']
['1', '29', '51', '27', '21', '41', '17', '28', '23', '24', '22', '49', '52', '9', '35', '25', '42', '50', '43', '36', '37', '44', '26', '7', '30', '31', '2', '32', '33', '38', 

In [3]:
def compute_support(dataset, rule):
    count_rule = 0
    for item in dataset:
        if all(element in item for element in rule):
            count_rule+=1
    return count_rule

def compute_confidence(dataset:list, antecedent, consequent)->float:
    antecedent = [antecedent] if not isinstance(antecedent,list) else antecedent # solo perché a volte non è già una lista e non gli piace
    support_antecedent = compute_support(dataset,antecedent) 
    support_consequent = compute_support(dataset,antecedent+consequent)

    #print(f"{antecedent} has support {support_antecedent}.\n{antecedent},{consequent} has support {support_consequent}.")
    return support_consequent/support_antecedent

selected_rule_antecedent = ["41","1","29"]
selected_rule_consequent = ["17"]


confidence = compute_confidence(dataset,selected_rule_antecedent,selected_rule_consequent)

print(f"Rule {selected_rule_antecedent} --> {selected_rule_consequent} has confidence {confidence:4.3}.\nAntecedent support: {compute_support(dataset,selected_rule_antecedent)}\nAntecedent U consequent support: {compute_support(dataset,selected_rule_antecedent+selected_rule_consequent)}")

Rule ['41', '1', '29'] --> ['17'] has confidence 0.864.
Antecedent support: 22
Antecedent U consequent support: 19


In [14]:
from itertools import permutations
rule_permutations = list(permutations(selected_rule_antecedent))


class ShapleyTable:
    table = {}
    def __init__(dataset, antecedent, consequent):
        pass
    def __str__():
        pass

# Initialize the table for computing shape
def init_conf_table(dataset, antecedent, consequent):
    table = {}
    # For each permutation of the rule
    for permutation in rule_permutations:
        ###print(f"--Permutation {permutation}:")
        # Initialize a dict with the row of the table
        shapley_values_rows = {}

        # for each increasing portion of the permutation (es. ABC --> A, AB, ABC)
        for i in range(0,len(permutation)):
            # Extract the portion of the permutation
            to_compute = list(permutation[:i+1])
            # Compute the starting shapley value, aka the confidence of the biggest subset of the permutation (es. ABC --> ABC)
            curr_value = compute_confidence(dataset,to_compute,consequent) 

            
            ###print(f"> computing {to_compute}: {to_compute}{curr_value:4.2}",end="")
            # Now substract all the smaller ones, in order (es. ABC --> subtract AB and A)
            for j in range(0,len(to_compute)-1):
                curr_value-=compute_confidence(dataset,to_compute[:j+1],consequent)
                ###print(f" - {to_compute[:j+1]}{compute_confidence(dataset,to_compute[:j+1],consequent):4.2}",end="")

            ###print("")            

            shapley_values_rows[permutation[i]]= curr_value


        table[permutation] = shapley_values_rows       

        #print(confidence_table_values)
    return table




def decent_print(shapley_table:dict):
    
    #header
    print("Permutation\t\t",end="")
    for item in sorted(selected_rule_antecedent):
        print(f"| \t {item}\t", end="")
    print("")  

    print("-----------------------"*len(selected_rule_antecedent))

    for permutation in shapley_table:
        print(f"{permutation}\t",end="")
        for item in sorted(selected_rule_antecedent):
            print(f"| {shapley_table[permutation][item]:10.2}\t",end="")
        print("")

    print("-----------------------"*len(selected_rule_antecedent))

    print("Average\t\t\t",end="")
    for item in sorted(selected_rule_antecedent):
        average = []
        for permutation in shapley_table:
            average.append(shapley_table[permutation][item])
        #print(f"{item}---->{average}")
        print(f"|{sum(average) / float(len(average)):10.2}\t",end="")

a=init_conf_table(dataset, selected_rule_antecedent, selected_rule_consequent)

decent_print(a)




Permutation		| 	 1	| 	 29	| 	 41	
---------------------------------------------------------------------
('41', '1', '29')	|        0.0	|      -0.86	|       0.86	
('41', '29', '1')	|      -0.86	|        0.0	|       0.86	
('1', '41', '29')	|       0.92	|      -0.92	|     -0.053	
('1', '29', '41')	|       0.92	|        0.0	|      -0.97	
('29', '41', '1')	|      -0.92	|       0.92	|     -0.053	
('29', '1', '41')	|   -0.00047	|       0.92	|      -0.97	
---------------------------------------------------------------------
Average			|    0.0087	|     0.009	|    -0.053	

In [5]:
column_order = sorted(selected_rule_antecedent)

def print_conf_table(confidence_table):
    for permutation in confidence_table:
        print(f"{permutation}  ", end="")
        for element in sorted(permutation):
            print(f"{element}:{float(confidence_table[permutation][element]):4.2}\t   ",end="")
        print("")


confidence_table = init_conf_table()

# riga
for permutation in confidence_table:
    prec_calc = 0
    prec_rule = []
    for element in permutation:
       # print(f"curr rule:{[element]+prec_rule}\tprec rule:{prec_rule}\n\t\tprec calc:{prec_calc}\n\t\tconf{[element]+prec_rule}:{calculate_confidence(dataset,[element] + prec_rule,selected_rule_consequent)}-{prec_calc}"   )
        confidence_table[permutation][element]=\
            compute_confidence(dataset,[element] + prec_rule,selected_rule_consequent)-prec_calc
        prec_calc += confidence_table[permutation][element]
        prec_rule.append(element)
    
print_conf_table(confidence_table)

TypeError: init_conf_table() missing 3 required positional arguments: 'dataset', 'antecedent', and 'consequent'

In [None]:

# calcola media per colonna
avgs = {key: 0 for key in column_order}
for permutation in confidence_table:
    for column,shapley in confidence_table[permutation].items():
        avgs[column]+=shapley / (len(column_order))

print(avgs)

{'18': 0.4095637835791547, '5': 0.3654749896642356, '725': 1.1780862267566097}
