In [17]:
from accelerate import Accelerator
from safetensors import safe_open
from safetensors.torch import load_file
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader 
import ast

In [18]:
from handcoded_tokenizer import STLTokenizer
from configuration import STLConfig
from modeling_stldec import STLForCausalLM

from transformers import AutoConfig, AutoModelForCausalLM

In [19]:
AutoConfig.register("STLdec", STLConfig)
AutoModelForCausalLM.register(STLConfig, STLForCausalLM)

In [20]:
config = STLConfig()

model_path = "output_test/epoch_2"
# Carica il modello e spostalo sulla device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = AutoModelForCausalLM.from_pretrained(model_path, config=config).to(device)  # Sposta il modello sulla device

In [21]:
# Inizializza l'Accelerator
accelerator = Accelerator()

# Definisci i percorsi
optimizer_path = "output_test/epoch_2/optimizer.bin"
scheduler_path = "output_test/epoch_2/scheduler.bin"

# Carica lo stato dell'ottimizzatore e dello scheduler, se necessario
# Questi passi dipendono dalla libreria che stai usando, di seguito un esempio generico
optimizer = torch.load(optimizer_path)
scheduler = torch.load(scheduler_path)

optimizer = accelerator.prepare(optimizer)
scheduler = accelerator.prepare(scheduler)

In [22]:
import pandas as pd

test_df = pd.read_csv("test_set.csv")
test_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Formula,Embedding,Encoded_Formula
0,11040,11040,"eventually[19,21] ( always[7,18] ( x_1 <= 1.17...","tensor([0.06276094168424606, 0.024036630988121...","[2, 1, 7, 19, 26, 34, 21, 27, 26, 20, 1, 4, 1,..."
1,31020,31020,( ( ( x_2 >= 0.0125 and x_1 <= -0.4342 ) and (...,"tensor([0.000540480890776962, 0.00209438544698...","[2, 1, 4, 1, 4, 1, 4, 1, 17, 18, 27, 1, 12, 1,..."
2,33594,33594,"( ( ( ( x_2 >= 0.3983 until[15,18] eventually[...","tensor([0.006538981106132269, 0.00377054791897...","[2, 1, 4, 1, 4, 1, 4, 1, 4, 1, 17, 18, 27, 1, ..."
3,2401,2401,"always[13,18] ( x_2 >= 1.2213 )","tensor([0.0006358523387461901, 0.0017320667393...","[2, 1, 6, 19, 26, 28, 21, 26, 33, 20, 1, 4, 1,..."
4,27801,27801,"( x_0 >= -0.6846 until[8,inf] always[10,12] ( ...","tensor([0.2162070870399475, 0.7531894445419312...","[2, 1, 4, 1, 17, 18, 25, 1, 12, 1, 23, 25, 24,..."


In [23]:
class CustomDataset(Dataset):
    def __init__(self, df, device='cpu'):
        self.df = df
        self.device = device  

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Start from `Encoded_Formula`
        encoded_formula = self.df['Encoded_Formula'][idx]
        encoded_formula = ast.literal_eval(encoded_formula.strip())
        
        input_ids = encoded_formula[:-1]  # Tutti tranne l'ultimo
        labels = encoded_formula[1:]     # Tutti tranne il primo

        attention_mask = [0 if token == '1' else 1 for token in input_ids]
        # if 1 (i.e. tokenized `pad`), then neglect that token

        input_ids = torch.tensor(input_ids, dtype=torch.long).to(self.device)
        labels = torch.tensor(labels, dtype=torch.long).to(self.device)
        attention_mask = torch.tensor(attention_mask, dtype=torch.long).to(self.device)

        return {
            'input_ids': input_ids,
            'labels': labels,
            'attention_mask': attention_mask
        }

In [24]:
test_dataset = CustomDataset(test_df, device=device)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [26]:
import torch 
import math

model.eval()
losses = []
for step, batch in enumerate(test_loader):
    with torch.no_grad():
        outputs = model(**batch)   
    loss = outputs.loss
    losses.append(accelerator.gather_for_metrics(loss.repeat(16)))

losses = torch.cat(losses)
try:
    eval_loss = torch.mean(losses)
    perplexity = math.exp(eval_loss)
except OverflowError:
    perplexity = float("inf")

In [27]:
# Visualizza le metriche
accelerator.print(f"Eval Loss: {eval_loss.item()}")  # Visualizza la perdita
accelerator.print(f"Perplexity: {perplexity}")      # Visualizza la perplessità

Eval Loss: 0.45194780826568604
Perplexity: 1.571369933915324


In [29]:
import torch

# Supponiamo che `test_df` contenga già la colonna `Encoded_Formula` con i token

# Estrai un esempio a caso dal test_df
example_idx = 0  # Puoi scegliere un altro indice o usare random per un campione casuale
encoded_formula = ast.literal_eval(test_df['Encoded_Formula'][example_idx].strip())  # Decodifica la lista di token

# Converti il vettore di token in un tensor
input_ids = torch.tensor(encoded_formula, dtype=torch.long).unsqueeze(0).to(model.device)  # Aggiungi la dimensione batch

# Impostazioni per la generazione autoregressiva
max_length = 50  # Lunghezza massima della sequenza generata
temperature = 1.0  # Controlla la casualità (1.0 = più casuale, 0.0 = deterministico)
top_k = 50  # Top-k sampling
top_p = 0.95  # Top-p sampling (nucleus sampling)
num_return_sequences = 1  # Numero di sequenze da generare

# Genera la sequenza autoregressiva
with torch.no_grad():
    generated_ids = model.generate(
        input_ids=input_ids,  # Usa gli ID tokenizzati
        pad_token_id=model.config.pad_token_id,  # ID del token di padding, se presente
    )

# Decodifica e visualizza il testo generato
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print("Generated Text:")
print(generated_text)

TypeError: The current model class (STLForCausalLM) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'STLForCausalLM'}

In [30]:
model

STLForCausalLM(
  (model): STLDecoder(
    (embed_tokens): Embedding(35, 1024, padding_idx=1)
    (embed_positions): STLSinusoidalPositionalEmbedding(1024, 1024)
    (layers): ModuleList(
      (0-11): 12 x STLDecoderBlock(
        (self_attn): STLAttention(
          (W_k): Linear(in_features=1024, out_features=1024, bias=False)
          (W_q): Linear(in_features=1024, out_features=1024, bias=False)
          (W_v): Linear(in_features=1024, out_features=1024, bias=False)
          (W_o): Linear(in_features=1024, out_features=1024, bias=False)
        )
        (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (encoder_attn): STLAttention(
          (W_k): Linear(in_features=1024, out_features=1024, bias=False)
          (W_q): Linear(in_features=1024, out_features=1024, bias=False)
          (W_v): Linear(in_features=1024, out_features=1024, bias=False)
          (W_o): Linear(in_features=1024, out_features=1024, bias=False)
        )
        (enc