## Select dataset and settings

In [1]:
# Imports and default options

import ipywidgets as widgets

selected_dataset : str = "skating"
selected_window : int = 3
selected_stride : int = 5
selected_items : list = [12,11,12]
possible_rules : set = set()


## Utility functions

In [2]:
# Loads dataset function
def load_dataset(selected_dataset):
    dataset=[]
    with open(selected_dataset+'.txt') as f:
        lines = f.read().splitlines()
        for l in lines[:50]:
            dataset.append(l.replace("\t"," ").split(" "))
    #print(f"Dataset {selected_dataset} loaded!")
    return dataset

In [3]:
# Finds all unique items in a dataset
def check_possible_rules(selected_dataset):
    possible_rules = set()
    for x in load_dataset(selected_dataset):
        for y in x:
            possible_rules.add(y)
    return possible_rules

possible_rules = check_possible_rules(selected_dataset)

In [4]:
# Checks if a string is substring (with the definition given in class) of another
def trova_sottostringa(sottostringa, lista):
    sottostringa = list(sottostringa)
    lista = list(lista)
    indice_sottostringa = 0
    for lettera in lista:
        if lettera == sottostringa[indice_sottostringa]:
            indice_sottostringa += 1
            if indice_sottostringa == len(sottostringa):
                return True
    return False

## Mini UI for quicker selection of parameters

In [5]:
# Widgets
input_dataset = widgets.Select(
    options=['skating','question'],
    value='skating',
    description='Dataset:',
    disabled=False,
)
input_window = widgets.IntSlider(
    value=selected_window,
    min=2,
    max=10,
    step=1,
    description='Window: ',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
input_stride = widgets.IntSlider(
    value=selected_stride,
    min=1,
    max=10,
    step=1,
    description='Stride: ',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
input_items = widgets.TagsInput(
    description= 'Rule to be checked: ',
    value=sorted(list(possible_rules))[0:input_window.value],
    allowed_tags=list(possible_rules),
    allow_duplicates=False
)


# Updating values
def on_value_change(change):
    global input_window, input_dataset, input_items
    
    possible_rules = check_possible_rules(input_dataset.value)
    
    input_items.value = sorted(list(possible_rules)[0:input_window.value])
    
    input_items.allowed_tags = sorted(list(possible_rules))
    
def on_dataset_change(change):
    global input_window, input_dataset, input_items
    
    possible_rules = check_possible_rules(input_dataset.value)
    
    input_items.allowed_tags = sorted(list(possible_rules))
    
    input_items.value = sorted(list(possible_rules)[0:input_window.value])


# Attaching update functions
input_window.observe(on_value_change, names='value')
input_dataset.observe(on_dataset_change, names='value')

button = widgets.Button(
    description='Calculate support',
    disabled=False,
    button_style='', 
    tooltip='Calculate support',
    icon='check'
)

# Display widgets
display(input_dataset,input_window,input_stride,widgets.Label(value="Rule to check (note: you can reorder the items)"),input_items)
print("")
#display(button)





Select(description='Dataset:', options=('skating', 'question'), value='skating')

IntSlider(value=3, description='Window: ', max=10, min=2)

IntSlider(value=5, description='Stride: ', max=10, min=1)

Label(value='Rule to check (note: you can reorder the items)')

TagsInput(value=['1', '10', '11'], allow_duplicates=False, allowed_tags=['64', '2', '25', '26', '43', '38', '2…




## Compute support value

In [6]:
selected_dataset = input_dataset.value
selected_window = input_window.value
selected_stride = input_stride.value
selected_items = input_items.value
possible_rules = check_possible_rules(selected_dataset)
dataset = load_dataset(selected_dataset)

print(f"Window {selected_window}, stride {selected_stride}, dataset {selected_dataset}")
print(f"Looking for string {selected_items}")

Window 10, stride 5, dataset question
Looking for string ['32', '35']


In [7]:
# 1. Calcolo di tutte le sottostringhe
substrings = set()
# scorre the array in batches of windows and saves the window in substring
for item in dataset:
    i = 0
    while i < (len(item)-selected_window):
        j = 0
        current_string = []
        while j < selected_window:
            current_string.append(item[i+j])
            j += 1
            
        substrings.add(tuple(current_string))
        i += selected_stride


# 2. Calcolo di quante sottostringhe contengono la mia sottostringa magica
count_found = 0
for item in substrings:
    if trova_sottostringa(selected_items,item):
        #print("found")
        count_found += 1

print(f"{count_found} entries found")


# 3. Calcolo il supporto della mia sottostringa
print(f"La regola {selected_items} viene trovata in {count_found} window su {len(substrings)} window totali; ha quindi un supporto di {count_found/len(substrings):4.3f}")



1 entries found
La regola ['32', '35'] viene trovata in 1 window su 29 window totali; ha quindi un supporto di 0.034
