In [1]:
%load_ext autoreload
%autoreload 2

import torch
import pandas as pd

from seqfacben.generators.random import RandomSequenceGenerator
from seqfacben.tasks.sorting import SortingTask
from seqfacben.models.simple_nn import SimpleNN
from seqfacben.manager.task_manager import TaskManager
from seqfacben.losses.sequence import cross_entropy


##### I. Single Combination

In [3]:
# Parameters
device = "cuda" if torch.cuda.is_available() else "cpu"

# Generator parameters
vocab_size = 64
seq_len = 32

# Model parameters
d_model = 64

# Training parameters
batch_size = 64
train_steps = 5000

# Set up and train
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
generator = RandomSequenceGenerator(seq_len, vocab_size)
task = SortingTask(generator, loss_fn=cross_entropy)
model = SimpleNN(vocab_size, seq_len, d_model=d_model).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

manager = TaskManager(task, model, optimizer, device)

# Train
manager.train(n_steps=train_steps, batch_size=batch_size)

# Test
print("\nTesting:")
val_loss, val_acc = manager.eval_step(batch_size=batch_size)
print(f"Final val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")

manager.show_examples(n_examples=3)

Step 1000: loss = 1.2891, acc = 0.5312
Step 2000: loss = 0.9713, acc = 0.6729
Step 3000: loss = 0.6739, acc = 0.7622
Step 4000: loss = 0.5744, acc = 0.8027
Step 5000: loss = 0.4823, acc = 0.8179

Testing:
Final accuracy: 0.7954

Examples:

Example 1:
  Input:  [37, 10, 36, 23, 15, 25, 41, 3, 27, 41, 19, 43, 58, 39, 16, 40, 59, 47, 40, 33, 51, 10, 38, 4, 29, 15, 14, 62, 54, 33, 34, 48]
  Target: [3, 4, 10, 10, 14, 15, 15, 16, 19, 23, 25, 27, 29, 33, 33, 34, 36, 37, 38, 39, 40, 40, 41, 41, 43, 47, 48, 51, 54, 58, 59, 62]
  Pred:   [3, 4, 10, 10, 14, 15, 15, 16, 19, 23, 26, 27, 30, 33, 33, 34, 36, 37, 38, 39, 40, 40, 41, 43, 43, 47, 48, 51, 54, 58, 59, 62]
  Correct: 29/32

Example 2:
  Input:  [30, 30, 32, 37, 61, 47, 43, 15, 29, 50, 44, 19, 11, 63, 14, 42, 43, 35, 47, 38, 61, 10, 53, 33, 59, 37, 60, 21, 63, 39, 37, 52]
  Target: [10, 11, 14, 15, 19, 21, 29, 30, 30, 32, 33, 35, 37, 37, 37, 38, 39, 42, 43, 43, 44, 47, 47, 50, 52, 53, 59, 60, 61, 61, 63, 63]
  Pred:   [10, 11, 13, 19, 26, 

##### II. Multi Experiment Example

In [None]:
import pandas as pd

# Experiment configurations
configs = [
    {'vocab_size': 5, 'seq_len': 10},
    {'vocab_size': 10, 'seq_len': 10},
    {'vocab_size': 20, 'seq_len': 10},
    {'vocab_size': 5, 'seq_len': 20},
    {'vocab_size': 5, 'seq_len': 30},
]

results = []

for config in configs:
    vocab_size = config['vocab_size']
    seq_len = config['seq_len']
    
    print(f"\n{'='*60}")
    print(f"Running: vocab_size={vocab_size}, seq_len={seq_len}")
    print(f"{'='*60}")
    
    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    generator = RandomSequenceGenerator(seq_len, vocab_size)
    task = SortingTask(generator, loss_fn=cross_entropy)
    model = SimpleNN(vocab_size, seq_len, d_model=64).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    manager = TaskManager(task, model, optimizer, device)
    
    # Train
    history = manager.train(n_steps=10000, batch_size=32, eval_every=500)
    
    # Final evaluation
    val_loss, val_acc = manager.eval_step(batch_size=256)
    
    # Save results
    last = history[-1] if history else {}
    results.append({
        'vocab_size': vocab_size,
        'seq_len': seq_len,
        'final_train_loss': last.get('train_loss'),
        'final_train_acc': last.get('train_acc'),
        'final_val_loss': last.get('val_loss') or val_loss,
        'final_val_acc': last.get('val_acc') or val_acc,
    })
    
    print(f"\nFinal val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")

# Create DataFrame
df = pd.DataFrame(results)
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)
print(df)
df.to_csv('sorting_experiments.csv', index=False)