## Select dataset and settings

In [2]:
# Imports and default options

import ipywidgets as widgets

selected_dataset : str = "skating"
selected_window : int = 3
selected_stride : int = 5
selected_items : list = [1,29]
possible_rules : set = set()


## Utility functions

In [3]:
# Loads dataset function
def load_dataset(selected_dataset):
    dataset=[]
    with open(selected_dataset+'.txt') as f:
        lines = f.read().splitlines()
        for l in lines[:50]:
            dataset.append(l.replace("\t"," ").split(" "))
    #print(f"Dataset {selected_dataset} loaded!")
    return dataset

In [4]:
# Finds all unique items in a dataset
def check_possible_rules(selected_dataset):
    possible_rules = set()
    for x in load_dataset(selected_dataset):
        for y in x:
            possible_rules.add(y)
    return possible_rules

possible_rules = check_possible_rules(selected_dataset)

In [5]:
# Checks if a string is substring (with the definition given in class) of another
def trova_sottostringa(sottostringa, lista):
    sottostringa = list(sottostringa)
    lista = list(lista)
    indice_sottostringa = 0
    for lettera in lista:
        if lettera == sottostringa[indice_sottostringa]:
            indice_sottostringa += 1
            if indice_sottostringa == len(sottostringa):
                return True
    return False

## Mini UI for quicker selection of parameters

In [6]:
# Widgets
input_dataset = widgets.Select(
    options=['skating','question'],
    value='skating',
    description='Dataset:',
    disabled=False,
)
input_window = widgets.IntSlider(
    value=selected_window,
    min=2,
    max=10,
    step=1,
    description='Window: ',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
input_stride = widgets.IntSlider(
    value=selected_stride,
    min=1,
    max=10,
    step=1,
    description='Stride: ',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
input_items = widgets.TagsInput(
    description= 'Rule to be checked: ',
    value=sorted(list(possible_rules))[0:input_window.value],
    allowed_tags=list(possible_rules),
    allow_duplicates=False
)


# Updating values
def on_value_change(change):
    global input_window, input_dataset, input_items
    
    possible_rules = check_possible_rules(input_dataset.value)
    
    input_items.value = sorted(list(possible_rules))[0:input_window.value]
    
    input_items.allowed_tags = sorted(list(possible_rules))
    
def on_dataset_change(change):
    global input_window, input_dataset, input_items
    
    possible_rules = check_possible_rules(input_dataset.value)
    
    input_items.allowed_tags = sorted(list(possible_rules))
    
    input_items.value = sorted(list(possible_rules))[0:input_window.value]


# Attaching update functions
input_window.observe(on_value_change, names='value')
input_dataset.observe(on_dataset_change, names='value')

button = widgets.Button(
    description='Calculate support',
    disabled=False,
    button_style='', 
    tooltip='Calculate support',
    icon='check'
)

# Display widgets
display(input_dataset,input_window,input_stride,widgets.Label(value="Rule to check (note: you can reorder the items)"),input_items)
print("")
#display(button)


print(load_dataset(selected_dataset))


Select(description='Dataset:', options=('skating', 'question'), value='skating')

IntSlider(value=3, description='Window: ', max=10, min=2)

IntSlider(value=5, description='Stride: ', max=10, min=1)

Label(value='Rule to check (note: you can reorder the items)')

TagsInput(value=['1', '10', '11'], allow_duplicates=False, allowed_tags=['16', '34', '63', '36', '8', '38', '6…


[['1', '29', '21', '15', '3', '1', '16', '17', '23', '9', '24', '25', '5', '6', '30', '31', '22', '7', '26', '2', '27', '10', '11', '28', '25', '12', '9', '8', '18', '15', '16', '19', '20', '17', '18', '15', '10', '13', '14', '9', '16', '17', '10', '18', '15', '32', '33', '16', '17', '26', '34', '31', '4', '18', '15', '16', '32'], ['1', '29', '23', '3', '41', '1', '24', '27', '9', '35', '28', '25', '21', '42', '39', '43', '36', '37', '44', '40', '38', '5', '6', '26', '27', '30', '31', '28', '25', '22', '26', '2', '23', '24', '27', '32', '29', '28', '30', '31', '15', '16', '19', '20', '17', '10', '13', '14', '9', '10', '18', '15', '16', '4', '32'], ['1', '29', '21', '1', '9', '45', '46', '27', '28', '23', '24', '27', '28', '25', '30', '31', '22', '2', '10', '13', '15', '14', '9', '16', '17', '10', '13', '14', '9', '10', '32', '33', '18', '15', '34', '31', '16', '17', '26', '18', '32'], ['1', '29', '41', '21', '25', '1', '17', '26', '27', '28', '9', '35', '27', '42', '28', '47', '25', '

## Compute support value

In [44]:
selected_dataset = input_dataset.value
selected_window = input_window.value
selected_stride = input_stride.value
selected_items = input_items.value
possible_rules = check_possible_rules(selected_dataset)
dataset = load_dataset(selected_dataset)

print(f"Window {selected_window}, stride {selected_stride}, dataset {selected_dataset}")
print(f"Looking for string {selected_items}")

Window 2, stride 1, dataset question
Looking for string ['0', '1']


In [45]:
# 1. Calcolo di tutte le sottostringhe
substrings = []
# scorre the array in batches of windows and saves the window in substring
for item in dataset:
    i = 0
    while i < (len(item)-selected_window+1):
        j = 0
        current_string = []
        while j < selected_window:
            current_string.append(item[i+j])
            j += 1
            
        substrings.append(tuple(current_string))
        i += selected_stride


# 2. Calcolo di quante sottostringhe contengono la mia sottostringa magica
count_found = 0
for item in substrings:
    if trova_sottostringa(selected_items,item):
        print(f"Found in {item}")
        count_found += 1

print(f"{count_found} entries found")

#print(substrings.__str__().replace("\n","   "))

# 3. Calcolo il supporto della mia sottostringa
print(f"La regola {selected_items} viene trovata in {count_found} window su {len(substrings)} window totali; ha quindi un supporto di {count_found/len(substrings):4.3f}")



Found in ('0', '1')
Found in ('0', '1')
Found in ('0', '1')
Found in ('0', '1')
4 entries found
La regola ['0', '1'] viene trovata in 4 window su 497 window totali; ha quindi un supporto di 0.008
