In [2]:
import networkx as nx
import stl
from utils import from_string_to_formula, load_pickle, dump_pickle
import numpy as np
import pandas as pd
import phis_generator_depth 

In [3]:

def get_name_given_type(formula):
    """
    Returns the type of node (as a string) of the top node of the formula/sub-formula
    """
    name_dict = {stl.And: 'and', stl.Or: 'or', stl.Not: 'not', stl.Eventually: 'F', stl.Globally: 'G', stl.Until: 'U',
                 stl.Atom: 'x'}
    return name_dict[type(formula)]


def get_id(child_name, name, label_dict, idx):
    """
    Get unique identifier for a node
    """
    while child_name in label_dict.keys():  # if the name is already present
        idx += 1
        child_name = name + "(" + str(idx) + ")"
    return child_name, idx                  # returns both the child name and the identifier


def get_temporal_list(temporal_node):
    """
    Returns the features vector for temporal nodes (the two bounds of the temporal interval)
    Variant and num_arg modify the length of the list to return (3, 4 or 5)
    """
    left = float(temporal_node.left_time_bound) if temporal_node.unbound is False else 0.
    right = float(temporal_node.right_time_bound) if (temporal_node.unbound is False and
                                                      temporal_node.right_unbound is False) else -1.
    vector_l = [left, right, 0.]      # third slot for sign and fourth for threshold        # add another slot for argument number
    return vector_l


def add_internal_child(current_child, current_idx, label_dict):
    child_name = get_name_given_type(current_child) + '(' + str(current_idx) + ')'
    child_name, current_idx = get_id(child_name, get_name_given_type(current_child), label_dict, current_idx)
    return child_name, current_idx


def add_leaf_child(node, name, label_dict, idx):
    """
    Add the edges and update the label_dictionary and the identifier count for a leaf node (variable)
    variant = ['original', 'threshold-sign', 'all-in-var']
    shared_var = [True, False] denotes if shared variables for all the DAG or single variables (tree-like)
    num_arg = [True, False] if true argument number is one-hot encoded in the feature vector
    until_right is a flag to detect when the argument number encoding should be 1
    """
    new_e = []
    label_dict[name] = [0., 0., 0.]     # te
    atom_idx =str(node).split()[0] +  '(' + str(idx) + ')'
    # different names for the same variables (e.g. x_1(5), x_1(8))
    idx += 1
    if atom_idx not in label_dict.keys():
        label_dict[atom_idx] = [0., 0., 0.]

    if str(node).split()[1] == '<=':
        label_dict[name] = [0., 0., round(node.threshold, 4)]
    else:
        label_dict[name] = [0., 0., round(node.threshold, 4)]
    new_e.append([name, atom_idx])
    return new_e, label_dict, idx+1


def traverse_formula(formula, idx, label_dict):
    current_node = formula
    edges = []
    if type(current_node) is not stl.Atom:
        current_name = get_name_given_type(current_node) + '(' + str(idx) + ')'
        if (type(current_node) is stl.And) or (type(current_node) is stl.Or) or (type(current_node) is stl.Not):
            label_dict[current_name] = [0., 0., 0. ] # temp_left, temp_right, threshold
        else:
            label_dict[current_name] = get_temporal_list(current_node)
        if (type(current_node) is stl.And) or (type(current_node) is stl.Or) or (type(current_node) is stl.Until):
            left_child_name, current_idx = add_internal_child(current_node.left_child, idx + 1, label_dict)
            edges.append([current_name, left_child_name])
            if type(current_node.left_child) is stl.Atom:
                e, d, current_idx = add_leaf_child(current_node.left_child, left_child_name, label_dict, current_idx+1)
                edges += e
                label_dict.update(d)
            e, d = traverse_formula(current_node.left_child, current_idx, label_dict)
            edges += e
            label_dict.update(d)
            right_child_name, current_idx = add_internal_child(current_node.right_child, current_idx + 1, label_dict)
            edges.append([current_name, right_child_name])
            if type(current_node.right_child) is stl.Atom:
                e, d, current_idx = add_leaf_child(current_node.right_child, right_child_name, label_dict,
                                                   current_idx+1)
                edges += e
                label_dict.update(d)
            e, d = traverse_formula(current_node.right_child, current_idx, label_dict)
            edges += e
            label_dict.update(d)
        else:
            # eventually, globally, not
            child_name, current_idx = add_internal_child(current_node.child, idx + 1, label_dict)
            edges.append([current_name, child_name])
            if type(current_node.child) is stl.Atom:
                e, d, current_idx = add_leaf_child(current_node.child, child_name, label_dict, current_idx+1)
                edges += e
                label_dict.update(d)
            e, d = traverse_formula(current_node.child, current_idx, label_dict)
            edges += e
            label_dict.update(d)
    return edges, label_dict


def build_dag(formula):
    edges, label_dict = traverse_formula(formula, 0, {})
    graph = nx.from_edgelist(edges, create_using=nx.DiGraph)
    assert(nx.is_directed_acyclic_graph(graph))
    return graph, label_dict


def get_depth(formula):
    phi_g = build_dag(formula)[0]
    return len(nx.dag_longest_path(phi_g)) - 1

In [47]:
df = pd.read_csv('formulae_mining/step_7200_formulae.csv')
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Gold Formula,Generated Formula,Embedding Gold Formula,Embedding Generated Formula
0,0,0,"always[10,inf] ( x_0 >= 0.9475 )","( x_0 >= 0.888 until[17,21] x_0 >= 0.888 )","[0.006420445162802935, 0.03939420357346535, 0....","[0.009619303978979588, 0.060488030314445496, 0..."
1,1,1,"not ( always[14,19] ( eventually[7,inf] ( x_0 ...","always[18,21] ( x_0 >= -0.8881 )","[0.4136229455471039, 0.7028579711914062, 0.000...","[0.5880542397499084, 0.721400797367096, 0.0004..."
2,2,2,"( x_2 <= -0.2641 until[6,9] x_1 <= 0.7181 )",( x_2 <= -0.0401 and x_2 <= -0.181 ),"[0.001527473097667098, 0.0028579006902873516, ...","[0.003919020760804415, 0.004877804778516293, 0..."
3,3,3,not ( x_0 <= 0.2762 ),not ( x_0 <= 0.0444 ),"[0.07814296334981918, 0.36237552762031555, 0.0...","[0.17230193316936493, 0.6238083839416504, 0.00..."
4,4,4,"eventually[6,15] ( x_2 <= 0.54 )","always[17,21] ( x_2 <= 0.8881 )","[0.029973845928907394, 0.015523881651461124, 0...","[0.04811500012874603, 0.02031852677464485, 0.0..."


In [48]:
df['Gold Formula'][0]

'always[10,inf] ( x_0 >= 0.9475 )'

In [53]:
from_string_to_formula(df['Gold Formula'][50])

<stl.Eventually at 0x7fd9f66c8310>

In [54]:
test_formula = from_string_to_formula(df['Gold Formula'][0])
print(get_depth(test_formula))

2


In [55]:
def mean_formulae_depth(dataset):
    formulae_depths = []
    for idx in range(len(dataset)):
        object_formula = from_string_to_formula(df['Gold Formula'][idx])
        formulae_depths.append(get_depth(object_formula))

    return np.mean(formulae_depths)

In [56]:
mean_formulae_depth(df)

4.75

In [41]:
stl_gen = StlGenerator(max_depth=5)

In [42]:
formulae = []
for i in range(100):
    formulae.append(get_depth(stl_gen.sample(3)))

In [43]:
np.mean(formulae)

4.68

In [44]:
np.max(formulae)

6

In [45]:
np.min(formulae)

2

In [1]:
import os
import torch
from torch.nn.functional import normalize
import copy
import numpy as np
import pandas as pd
import stl 
from encoder import STLEncoder

from phis_generator_depth import StlGenerator
from traj_measure import BaseMeasure
from utils import from_string_to_formula, load_pickle, dump_pickle, get_depth
from kernel import StlKernel

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from handcoded_tokenizer import STLTokenizer
tokenizer = STLTokenizer('tokenizer_files/tokenizer.json')

In [7]:
def generate_formulae_depth(n_phis, n_vars, depth):

    # Helper function to generate and filter formulae
    def generate_and_filter(n_phis, n_vars, depth):
        sampler = StlGenerator(max_depth=depth, min_depth=depth)
        sampled_objs = sampler.bag_sample(bag_size=n_phis, nvars=n_vars)

        # convert to string
        sampled_objs = list(map(str, sampled_objs))

        lengths = []
        for obj in sampled_objs:
            lengths.append(len(tokenizer.encode(obj)))

        # filter sampled_objs where length is less than 500
        filtered_objs = [obj for i, obj in enumerate(sampled_objs) if lengths[i] < 500]
        return filtered_objs

    # Generate initial batch of formulae
    formulae = generate_and_filter(n_phis, n_vars, depth)

    # If we don't have enough formulae, regenerate until we meet the required number
    while len(formulae) < n_phis:
        additional_formulae = generate_and_filter(n_phis - len(formulae), n_vars, depth)
        formulae.extend(additional_formulae)

    # Truncate the list to exactly n_phis formulae if needed
    formulae = formulae[:n_phis]

    # Create DataFrame with the filtered formulae
    df = pd.DataFrame({'Formula': formulae})

    return df

In [13]:
generate_formulae_depth(2, 3, 8)

Unnamed: 0,Formula
0,"not ( ( not ( eventually[3,12] ( ( always[9,34..."
1,"( eventually[0,9] ( not ( not ( ( ( ( always[3..."


In [3]:
test_sampled_collection = []

for i in range(len(test_sampled)):
    if get_depth(test_sampled[i]) == 4:
        test_sampled_collection.append(test_sampled)

In [4]:
test_sampled_collection

[[<stl.Or at 0x7f79436bd690>,
  <stl.Not at 0x7f79436bd790>,
  <stl.Until at 0x7f79436bdc90>,
  <stl.Not at 0x7f79436bdf50>,
  <stl.Globally at 0x7f79436be050>,
  <stl.Not at 0x7f79436be290>,
  <stl.Or at 0x7f79436be850>,
  <stl.Not at 0x7f79436be950>,
  <stl.Eventually at 0x7f79436bed10>,
  <stl.Globally at 0x7f79436bedd0>],
 [<stl.Or at 0x7f79436bd690>,
  <stl.Not at 0x7f79436bd790>,
  <stl.Until at 0x7f79436bdc90>,
  <stl.Not at 0x7f79436bdf50>,
  <stl.Globally at 0x7f79436be050>,
  <stl.Not at 0x7f79436be290>,
  <stl.Or at 0x7f79436be850>,
  <stl.Not at 0x7f79436be950>,
  <stl.Eventually at 0x7f79436bed10>,
  <stl.Globally at 0x7f79436bedd0>],
 [<stl.Or at 0x7f79436bd690>,
  <stl.Not at 0x7f79436bd790>,
  <stl.Until at 0x7f79436bdc90>,
  <stl.Not at 0x7f79436bdf50>,
  <stl.Globally at 0x7f79436be050>,
  <stl.Not at 0x7f79436be290>,
  <stl.Or at 0x7f79436be850>,
  <stl.Not at 0x7f79436be950>,
  <stl.Eventually at 0x7f79436bed10>,
  <stl.Globally at 0x7f79436bedd0>],
 [<stl.Or at 0x7

In [5]:
test_sampled = list(map(str, test_sampled))
test_sampled

['( ( ( x_1 >= 0.1802 or x_2 >= -0.9572 ) and eventually[11,15] ( x_2 >= 0.3763 ) ) or ( not ( x_1 <= -0.7689 ) until[4,12] ( x_0 >= 1.1819 until[34,36] x_2 >= -0.302 ) ) )',
 'not ( always[10,12] ( ( x_0 >= 0.2989 or x_1 >= -0.3329 ) ) )',
 '( not ( eventually ( x_0 <= -0.6442 ) ) until[28,31] ( always[28,33] ( x_0 >= -0.0131 ) and ( x_1 >= -0.1231 and x_0 >= 0.3661 ) ) )',
 'not ( ( ( x_2 <= 0.3782 and x_1 <= 1.4215 ) or not ( x_2 <= -0.4135 ) ) )',
 'always[34,36] ( always[17,22] ( ( x_0 <= -0.6427 and x_2 >= 0.4317 ) ) )',
 'not ( eventually[31,inf] ( not ( x_2 >= 0.5838 ) ) )',
 '( ( ( x_1 <= -0.0126 and x_1 <= 0.0025 ) or always[21,27] ( x_1 <= 0.2289 ) ) or ( ( x_2 <= 2.2102 and x_0 >= -2.7682 ) and eventually[21,35] ( x_1 >= -0.3583 ) ) )',
 'not ( always[27,29] ( ( x_1 <= 0.4385 or x_0 <= 1.1103 ) ) )',
 'eventually[25,31] ( ( always[27,34] ( x_2 <= -0.0535 ) and ( x_2 >= 0.2818 and x_2 >= -0.5559 ) ) )',
 'always[30,32] ( always[3,12] ( not ( x_0 >= -1.7098 ) ) )']

In [14]:
encoder = encoder.STLEncoder(embed_dim = 1024, anchor_filename = "anchor_set_1024_dim.pickle")

In [15]:
def generate_formulae_depth(n_phis, n_vars, depth):

    # generate formulae of depth equal to (depth + 1) 
    sampler = StlGenerator(max_depth = depth, min_depth = depth)
    sampled_objs = sampler.bag_sample(bag_size = n_phis, nvars = n_vars)

    # check on the formulae depths
    for i in range(len(sampled_objs)):
        assert get_depth(sampled_objs[i]) == (depth+1)

    # converted into strings
    sampled_formulae = list(map(str, sampled_objs))
    encoded_formulae = encoder.compute_embeddings(sampled_formulae)

    df = pd.DataFrame({'Formula': sampled_formulae, 'Embedding': encoded_formulae})
    
    return df

In [17]:
generate_formulae_depth(2, 3, 3)

KeyboardInterrupt: 

In [1]:
import os
import torch
from torch.nn.functional import normalize
import copy
import numpy as np
import pandas as pd
import stl 
from encoder import STLEncoder
from handcoded_tokenizer import STLTokenizer

from phis_generator_depth import StlGenerator
from traj_measure import BaseMeasure
from utils import from_string_to_formula, load_pickle, dump_pickle, get_depth
from kernel import StlKernel


encoder = STLEncoder(embed_dim = 1024, anchor_filename = "anchor_set_1024_dim.pickle")
tokenizer = STLTokenizer('tokenizer_files/tokenizer.json')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Helper function to generate and filter formulae
def generate_and_filter(n_phis, n_vars, depth):
    sampler = StlGenerator(max_depth=depth, min_depth=depth)
    sampled_objs = sampler.bag_sample(bag_size=n_phis, nvars=n_vars)

    # convert to string
    sampled_objs = list(map(str, sampled_objs))

    lengths = []
    for obj in sampled_objs:
        lengths.append(len(tokenizer.encode(obj)))

    # filter sampled_objs where length is less than 500
    
    filtered_objs = [obj for i, obj in enumerate(sampled_objs) if lengths[i] < 500]
    return filtered_objs

In [3]:
def generate_formulae_depth(n_phis, n_vars, depth):

    # Generate initial batch of formulae
    formulae = generate_and_filter(n_phis, n_vars, depth)

    # If we don't have enough formulae, regenerate until we meet the required number
    while len(formulae) < n_phis:
        delta = n_phis - len(formulae)
        additional_formulae = generate_and_filter(delta, n_vars, depth)
        formulae.append(additional_formulae)

    # Truncate the list to exactly n_phis formulae if needed
    formulae = formulae[:n_phis]

    return formulae

In [4]:
def embed_generated_formulae(df):
    # sampled_formulae = list(map(str, df['Formula Obj']))
    formulae_embeddings = encoder.compute_embeddings(df)
    return formulae_embeddings.tolist()

In [8]:
test = generate_formulae_depth(1, 2, 4)
len(test)

1

In [9]:
embed_generated_formulae(test)

KeyboardInterrupt: 