In [1]:
%load_ext autoreload
%autoreload 2

import torch
import pandas as pd

from seqfacben.generators.random import RandomSequenceGenerator
from seqfacben.tasks.sorting import SortingTask
from seqfacben.models.simple_nn import SimpleNN
from seqfacben.manager.task_manager import TaskManager
from seqfacben.losses.sequence import cross_entropy


##### I. Single Combination

In [2]:
# Parameters
device = "cuda" if torch.cuda.is_available() else "cpu"

# Generator parameters
vocab_size = 64
seq_len = 32

# Model parameters
d_model = 64

# Training parameters
batch_size = 64
train_steps = 5000

# Set up and train
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
generator = RandomSequenceGenerator(seq_len, vocab_size)
task = SortingTask(generator, loss_fn=cross_entropy)
model = SimpleNN(vocab_size, seq_len, d_model=d_model).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

manager = TaskManager(task, model, optimizer, device)

# Train
manager.train(n_steps=train_steps, batch_size=batch_size)

# Test
print("\nTesting:")
val_loss, val_acc = manager.eval_step(batch_size=batch_size)
print(f"Final val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")

manager.show_examples(n_examples=3)

Using device: cuda
Step 1000: train_loss=1.3151, train_acc=0.5298, val_loss=1.4312, val_acc=0.4863
Step 2000: train_loss=0.9557, train_acc=0.6460, val_loss=0.9235, val_acc=0.6685
Step 3000: train_loss=0.6927, train_acc=0.7471, val_loss=0.7392, val_acc=0.7329
Step 4000: train_loss=0.5977, train_acc=0.7754, val_loss=0.8315, val_acc=0.7485
Step 5000: train_loss=0.5894, train_acc=0.8174, val_loss=0.4823, val_acc=0.8335

Testing:
Final val_loss=0.4618, val_acc=0.8325

Examples:

Example 1:
  Input:  [58, 58, 37, 0, 23, 1, 3, 40, 52, 63, 6, 48, 7, 17, 13, 21, 16, 22, 15, 31, 12, 40, 49, 24, 60, 13, 29, 47, 52, 27, 28, 27]
  Target: [0, 1, 3, 6, 7, 12, 13, 13, 15, 16, 17, 21, 22, 23, 24, 27, 27, 28, 29, 31, 37, 40, 40, 47, 48, 49, 52, 52, 58, 58, 60, 63]
  Pred:   [0, 1, 3, 6, 7, 12, 13, 13, 15, 15, 17, 22, 22, 23, 24, 27, 27, 28, 29, 31, 37, 40, 40, 47, 48, 49, 52, 52, 58, 58, 60, 63]
  Correct: 30/32

Example 2:
  Input:  [2, 14, 21, 23, 52, 30, 15, 63, 6, 35, 1, 6, 58, 28, 63, 19, 12, 0, 2

##### II. Multi Experiment Example

In [None]:
import pandas as pd

# Experiment configurations
configs = [
    {'vocab_size': 5, 'seq_len': 10},
    {'vocab_size': 10, 'seq_len': 10},
    {'vocab_size': 20, 'seq_len': 10},
    {'vocab_size': 5, 'seq_len': 20},
    {'vocab_size': 5, 'seq_len': 30},
]

results = []

for config in configs:
    vocab_size = config['vocab_size']
    seq_len = config['seq_len']
    
    print(f"\n{'='*60}")
    print(f"Running: vocab_size={vocab_size}, seq_len={seq_len}")
    print(f"{'='*60}")
    
    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    generator = RandomSequenceGenerator(seq_len, vocab_size)
    task = SortingTask(generator, loss_fn=cross_entropy)
    model = SimpleNN(vocab_size, seq_len, d_model=64).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    manager = TaskManager(task, model, optimizer, device)
    
    # Train
    history = manager.train(n_steps=10000, batch_size=32, eval_every=500)
    
    # Final evaluation
    val_loss, val_acc = manager.eval_step(batch_size=256)
    
    # Save results
    last = history[-1] if history else {}
    results.append({
        'vocab_size': vocab_size,
        'seq_len': seq_len,
        'final_train_loss': last.get('train_loss'),
        'final_train_acc': last.get('train_acc'),
        'final_val_loss': last.get('val_loss') or val_loss,
        'final_val_acc': last.get('val_acc') or val_acc,
    })
    
    print(f"\nFinal val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")

# Create DataFrame
df = pd.DataFrame(results)
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)
print(df)
df.to_csv('sorting_experiments.csv', index=False)