In [1]:
import tiktoken
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

from gpt_model import GPTModel
from clean_gutenberg_text import clean_gutenberg_text
from train_sae import train_sae

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using {device} device.")

Using cpu device.


In [3]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 256,
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate": 0.2,
    "qkv_bias": False,
    "device": device,
}

In [4]:
model = GPTModel(GPT_CONFIG_124M)
checkpoint = torch.load("model_and_optimizer_5.pth", weights_only=True)

model.load_state_dict(checkpoint["model_state_dict"])
model.to(device)
model.eval();

FileNotFoundError: [Errno 2] No such file or directory: 'model_and_optimizer_5.pth'

In [None]:
tokenizer = tiktoken.get_encoding("gpt2")

In [None]:
import re

def load_and_clean_text(file_path):
    """
    Loads a text file and splits it into sentences while cleaning the text.
    
    Args:
    - file_path (str): Path to the text file.
    
    Returns:
    - list: A list of cleaned sentences from the book.
    """
    
    text = clean_gutenberg_text(file_path)

    # Split text into sentences (simple heuristic using punctuation)
    sentences = re.split(r"(?<=[.!?])\s+", text)

    # Remove very short or long sentences
    sentences = [s.strip() for s in sentences if 5 < len(s.split()) < 50]

    return sentences

In [None]:
directory="original_texts/"
dataset = []

for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        sentences = load_and_clean_text(os.path.join(directory, filename))
        dataset += sentences

In [None]:
def text_to_token_ids(text, tokenizer):
    encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'})
    encoded_tensor = torch.tensor(encoded).unsqueeze(0) # add batch dimension
    return encoded_tensor

def get_token_embeddings(text, model, tokenizer, layers=[6, 12]):
    """
    Extracts token embeddings from specified transformer layers.

    Args:
    - text (str): Input text.
    - model: Custom GPT model.
    - tokenizer: tiktoken encoding object.
    - layers (list): Transformer layers to extract embeddings from.

    Returns:
    - dict: Layer-wise token embeddings {layer_number: embeddings}
    """

    input_ids = text_to_token_ids(text, tokenizer).to(device)

    with torch.no_grad():
        _, hidden_states = model(input_ids, output_hidden_states=True)

    embeddings = {}
    for layer in layers:
        if layer - 1 < len(hidden_states):
            embeddings[layer] = hidden_states[layer - 1].squeeze(0).cpu().numpy()
        else:
            print(f"⚠️ Warning: Layer {layer} is out of range (max index {len(hidden_states) - 1})")

    return embeddings

In [None]:
layer6_embeddings = []
layer12_embeddings = []

for sentence in dataset:
    embeddings = get_token_embeddings(sentence, model, tokenizer)
    layer6_embeddings.append(embeddings[6])
    layer12_embeddings.append(embeddings[12])

# Convert to NumPy and flatten tokens into dataset
layer6_embeddings = np.vstack(layer6_embeddings)
layer12_embeddings = np.vstack(layer12_embeddings)

os.makedirs("sae_data", exist_ok=True)
np.save("sae_data/layer6_embeddings.npy", layer6_embeddings)
np.save("sae_data/layer12_embeddings.npy", layer12_embeddings)

print("Saved token embeddings:")
print(f"Layer 6: {layer6_embeddings.shape}")
print(f"Layer 12: {layer12_embeddings.shape}")

In [None]:
layer6_embeddings = np.load("sae_data/layer6_embeddings.npy")
layer12_embeddings = np.load("sae_data/layer12_embeddings.npy")

In [10]:
train_losses = []
val_losses = []
global train_losses, val_losses

train_sae(layer6_embeddings, "sae_layer6_1.pth", train_losses=[],
          val_losses=val_losses, epochs=500, device=device)



Epoch [1/500], Train Loss: 2.3618, Val Loss: 2.2800
Epoch [2/500], Train Loss: 2.2735, Val Loss: 2.2984
Epoch [3/500], Train Loss: 2.2630, Val Loss: 2.2754
Epoch [4/500], Train Loss: 2.2570, Val Loss: 2.2771
Epoch [5/500], Train Loss: 2.2525, Val Loss: 2.2562
Epoch [6/500], Train Loss: 2.2502, Val Loss: 2.2787
Epoch [7/500], Train Loss: 2.2485, Val Loss: 2.2574
Epoch [8/500], Train Loss: 2.2471, Val Loss: 2.2740
Epoch [9/500], Train Loss: 2.2461, Val Loss: 2.2814
Epoch [10/500], Train Loss: 2.2452, Val Loss: 2.2925
Epoch [11/500], Train Loss: 2.2444, Val Loss: 2.2744
Epoch [12/500], Train Loss: 2.2260, Val Loss: 2.2265
Epoch [13/500], Train Loss: 2.2237, Val Loss: 2.2189
Epoch [14/500], Train Loss: 2.2233, Val Loss: 2.2273
Epoch [15/500], Train Loss: 2.2230, Val Loss: 2.2203
Epoch [16/500], Train Loss: 2.2226, Val Loss: 2.2392
Epoch [17/500], Train Loss: 2.2220, Val Loss: 2.2464
Epoch [18/500], Train Loss: 2.2219, Val Loss: 2.2346
Epoch [19/500], Train Loss: 2.2212, Val Loss: 2.2189
Ep

In [11]:
train_losses = []
val_losses = []
global train_losses, val_losses

train_sae(layer12_embeddings, "sae_layer12.pth", train_losses=[],
          val_losses=val_losses, epochs=500, device=device)

Epoch [1/500], Train Loss: 5.9004, Val Loss: 5.7176
Epoch [2/500], Train Loss: 5.7059, Val Loss: 5.6882
Epoch [3/500], Train Loss: 5.6882, Val Loss: 5.7546
Epoch [4/500], Train Loss: 5.6805, Val Loss: 5.7202
Epoch [5/500], Train Loss: 5.6752, Val Loss: 5.7325
Epoch [6/500], Train Loss: 5.6718, Val Loss: 5.6864
Epoch [7/500], Train Loss: 5.6699, Val Loss: 5.6657
Epoch [8/500], Train Loss: 5.6674, Val Loss: 5.6849
Epoch [9/500], Train Loss: 5.6654, Val Loss: 5.7040
Epoch [10/500], Train Loss: 5.6632, Val Loss: 5.7012
Epoch [11/500], Train Loss: 5.6616, Val Loss: 5.7231
Epoch [12/500], Train Loss: 5.6603, Val Loss: 5.6998
Epoch [13/500], Train Loss: 5.6603, Val Loss: 5.6749
Epoch [14/500], Train Loss: 5.6296, Val Loss: 5.5976
Epoch [15/500], Train Loss: 5.6270, Val Loss: 5.6285
Epoch [16/500], Train Loss: 5.6266, Val Loss: 5.6242
Epoch [17/500], Train Loss: 5.6256, Val Loss: 5.6264
Epoch [18/500], Train Loss: 5.6253, Val Loss: 5.6167
Epoch [19/500], Train Loss: 5.6253, Val Loss: 5.5838
Ep

In [6]:
import optuna
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.optim as optim
import torch.nn as nn
from sparse_auto_encoder import SparseAutoencoder

def objective(trial, embeddings_path):
    # Hyperparameter search space
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    hidden_dim = trial.suggest_int("hidden_dim", 64, 512, step=64)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load your embeddings
    embeddings = np.load(embeddings_path)  # Replace with actual file
    embeddings = torch.tensor(embeddings, dtype=torch.float32).to(device)

    input_dim = embeddings.shape[1]
    sae = SparseAutoencoder(input_dim=input_dim, hidden_dim=hidden_dim).to(device)

    optimizer = optim.AdamW(sae.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()

    dataset = TensorDataset(embeddings)
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

    best_val_loss = float("inf")
    patience, early_stop_counter = 10, 0

    for epoch in range(30):  # Tune for a fixed number of epochs
        sae.train()
        train_loss = 0.0
        for batch in train_loader:
            inputs = batch[0].to(device)
            optimizer.zero_grad()
            outputs, encoded = sae(inputs)
            loss = criterion(outputs, inputs)
            sparsity_loss = torch.norm(encoded, p=1) * 1e-4
            total_loss = loss + sparsity_loss
            total_loss.backward()
            optimizer.step()
            train_loss += total_loss.item()

        sae.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                inputs = batch[0].to(device)
                outputs, encoded = sae(inputs)
                loss = criterion(outputs, inputs)
                sparsity_loss = torch.norm(encoded, p=1) * 1e-4
                total_loss = loss + sparsity_loss
                val_loss += total_loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            break

    return best_val_loss  # Optuna minimizes this value

In [7]:
# Run hyperparameter tuning
study = optuna.create_study(direction="minimize")
study.optimize(lambda trial: objective(trial, embeddings_path="sae_data/layer6_embeddings.npy"), n_trials=30)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

[I 2025-03-06 15:29:09,452] A new study created in memory with name: no-name-043ed9f3-7d38-41b2-9d5e-0003d269b796
  lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
[W 2025-03-06 15:29:09,458] Trial 0 failed with parameters: {'batch_size': 32, 'lr': 0.005764291861533475, 'hidden_dim': 448, 'weight_decay': 5.804798232732968e-06} because of the following error: FileNotFoundError(2, 'No such file or directory').
Traceback (most recent call last):
  File "C:\Users\IuG_Lap1\AppData\Local\Programs\Python\Python312\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\IuG_Lap1\AppData\Local\Temp\ipykernel_15332\2580150686.py", line 3, in <lambda>
    study.optimize(lambda trial: objective(trial, embeddings_path="sae_data/layer6_embeddings.npy"), n_trials=30)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

FileNotFoundError: [Errno 2] No such file or directory: 'sae_data/layer6_embeddings.npy'