In [1]:
import os
import torch
from torch.nn.functional import normalize
import copy
import numpy as np

from phis_generator import StlGenerator
from traj_measure import BaseMeasure
from utils import from_string_to_formula, load_pickle, dump_pickle
from kernel import StlKernel

In [2]:
sampler = StlGenerator(leaf_prob=0.4)
n_vars = 3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mu = BaseMeasure(device=device)

In [10]:
phis = sampler.bag_sample(100000, nvars=n_vars)

In [11]:
len(phis)

100000

In [12]:
string_version = list(map(str, phis))
string_version[0]

'( ( x_1 <= 0.0557 or x_1 >= 1.3788 ) and x_0 <= 0.8512 )'

In [13]:
# Salva la lista in un file txt
with open('generated-train.txt', 'w') as file:
    for item in string_version:
        file.write(item + '\n')  # Scrive ogni elemento su una nuova riga

In [14]:
from anchor_set_generation import anchorGeneration

anchorGeneration(diff_init = True, embed_dim = 1000)

'anchor_set_1000_dim'

In [4]:
import re

def rileggi_formule(file_path):
    re_read = []
    with open(file_path, 'r') as file:
        lines = file.readlines()  # Legge tutte le righe del file

    # Itera su ogni riga del file
    for line_number, line in enumerate(lines, start=1):
        line = line.strip()  # Rimuove eventuali spazi bianchi extra

        # Se la riga contiene una formula (es. +, -, *, /, ecc.)
        if re.search(r'[\+\-\*/=<>^]', line):  # Regex per trovare operatori matematici
            re_read.append(line)

    return re_read

In [5]:
re_read = rileggi_formule('generated-train.txt')
re_read[0]

'( ( x_1 <= 0.0557 or x_1 >= 1.3788 ) and x_0 <= 0.8512 )'

In [6]:
formulae = list(map(from_string_to_formula, re_read))
formulae[0]

<stl.And at 0x7f6471109e20>

In [7]:
anchor_set = load_pickle('anchor_set_1000_dim.pickle')
anchor_set[0]

<stl.Globally at 0x7f6465e49310>

In [8]:
kernel = StlKernel(mu, varn=n_vars)

gram_phis = kernel.compute_bag_bag(formulae[:1000], anchor_set)

In [11]:
import pickle 

with open("embedding_tensor_1000.pickle", "wb") as f:
    pickle.dump(gram_phis, f)

In [13]:
with open("embedding_tensor_1000.pickle", "rb") as f:
    loaded_tensor = pickle.load(f)

In [21]:
loaded_tensor[0]

tensor([0.0058, 0.0044, 0.0008, 0.0372, 0.0071, 0.0436, 0.0010, 0.0544, 0.0830,
        0.0034, 0.0048, 0.0082, 0.0009, 0.0720, 0.0028, 0.0003, 0.0066, 0.0384,
        0.0088, 0.0046, 0.0049, 0.0078, 0.0165, 0.0165, 0.0009, 0.0573, 0.0006,
        0.0305, 0.0053, 0.0044, 0.0023, 0.0059, 0.0073, 0.0042, 0.0014, 0.0241,
        0.0419, 0.0026, 0.0075, 0.0094, 0.0007, 0.0013, 0.0006, 0.0086, 0.0020,
        0.0010, 0.0027, 0.0043, 0.0062, 0.0093, 0.0078, 0.0045, 0.0052, 0.0317,
        0.0070, 0.0026, 0.0485, 0.0048, 0.0065, 0.0071, 0.0069, 0.0081, 0.0048,
        0.0038, 0.0009, 0.0253, 0.0010, 0.0010, 0.0028, 0.0017, 0.0052, 0.0005,
        0.0013, 0.0152, 0.0181, 0.0362, 0.0022, 0.0054, 0.0721, 0.0066, 0.0035,
        0.0063, 0.0060, 0.0352, 0.0641, 0.0151, 0.0187, 0.0007, 0.0410, 0.0009,
        0.0011, 0.0562, 0.0087, 0.0097, 0.0013, 0.0036, 0.0055, 0.0017, 0.0052,
        0.0016, 0.0046, 0.0834, 0.0012, 0.0014, 0.0019, 0.0044, 0.0031, 0.0415,
        0.0176, 0.0067, 0.0122, 0.0095, 

In [22]:
import pandas as pd

dataset = []

for formula in zip(formulae[:1000], loaded_tensor):
    embedding_str = f"tensor({embedding.tolist()})"  # Usa .tolist() per convertirlo in lista
    dataset.append([formula, embedding_str])  # Aggiungi la formula e l'embedding come stringa

# 4. Creiamo un DataFrame con pandas
df = pd.DataFrame(dataset, columns=["Formula", "Embedding"])


In [23]:
df.head()

Unnamed: 0,Formula,Embedding
0,(( ( x_1 <= 0.0557 or x_1 >= 1.3788 ) and x_0 ...,"tensor([0.44893965125083923, 0.342872321605682..."
1,"(( ( eventually[5,9] ( eventually ( x_0 <= -0....","tensor([0.44893965125083923, 0.342872321605682..."
2,"(always[18,20] ( x_0 >= 1.2398 ), [tensor(0.17...","tensor([0.44893965125083923, 0.342872321605682..."
3,"(( x_0 <= -0.0566 until[5,inf] x_1 >= 1.1386 )...","tensor([0.44893965125083923, 0.342872321605682..."
4,"(( eventually[17,21] ( always[9,14] ( x_2 >= -...","tensor([0.44893965125083923, 0.342872321605682..."


In [24]:
df.to_csv('formulas_with_embeddings.csv', index=False)