In [1]:
import os
import numpy as np
import torch
import torch.optim as optim
from collections import defaultdict
import time

# Import your custom modules
import tools
from network import Model
from task import generate_trials, rules_dict

In [2]:
# Cell 1: Set up the environment and hyperparameters
model_dir = 'data/debug'
os.makedirs(model_dir, exist_ok=True)

hp = {
    'activation': 'relu',
    'n_rnn': 256,
    'learning_rate': 0.001,
    'l2_h': 0.,
    'use_separate_input': False,
    'ruleset': 'all',
    'batch_size_train': 64,
    'batch_size_test': 512,
    'seed': 0
}

In [3]:
# Cell 2: Initialize the model and other components
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Get default hyperparameters and update with custom ones
default_hp = tools.get_default_hp(hp['ruleset'])
default_hp.update(hp)
hp = default_hp

# Set random seed
np.random.seed(hp['seed'])
torch.manual_seed(hp['seed'])

Using device: cpu


<torch._C.Generator at 0x1106ab3f0>

In [4]:
# Initialize the model
model = Model(model_dir, hp=hp).to(device)
print(model)

# Set up the optimizer
optimizer = optim.Adam(model.parameters(), lr=hp['learning_rate'])

Model(
  (rnn): LeakyRNNCell()
  (output): Linear(in_features=256, out_features=33, bias=True)
)


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Cell 3: Generate a trial batch
rule_train = np.random.choice(rules_dict[hp['ruleset']])
trial = generate_trials(rule_train, hp, 'random', batch_size=hp['batch_size_train'])

print(f"Generated trial for rule: {rule_train}")
print(f"Trial x shape: {trial.x.shape}")
print(f"Trial y shape: {trial.y.shape}")


KeyError: 'rng'

In [6]:
# Cell 4: Prepare input data
feed_dict = tools.gen_feed_dict(model, trial, hp)
x = feed_dict['x'].to(device)
y = feed_dict['y'].to(device)
c_mask = feed_dict['c_mask'].to(device)

print(f"Input x shape: {x.shape}")
print(f"Input y shape: {y.shape}")
print(f"Input c_mask shape: {c_mask.shape}")

NameError: name 'trial' is not defined

In [None]:
# Cell 5: Forward pass
try:
    y_hat, loss = model(x, y, c_mask)
    print(f"Forward pass successful")
    print(f"Output y_hat shape: {y_hat.shape}")
    print(f"Loss: {loss.item()}")
except Exception as e:
    print(f"Error during forward pass: {str(e)}")

In [7]:
# Cell 6: Backward pass
try:
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("Backward pass successful")
except Exception as e:
    print(f"Error during backward pass: {str(e)}")

Error during backward pass: name 'loss' is not defined


In [None]:
# Cell 7: Training loop (run this cell multiple times to train)
n_steps = 100
log = defaultdict(list)

for step in range(n_steps):
    rule_train = np.random.choice(rules_dict[hp['ruleset']])
    trial = generate_trials(rule_train, hp, 'random', batch_size=hp['batch_size_train'])
    
    feed_dict = tools.gen_feed_dict(model, trial, hp)
    x = feed_dict['x'].to(device)
    y = feed_dict['y'].to(device)
    c_mask = feed_dict['c_mask'].to(device)
    
    optimizer.zero_grad()
    y_hat, loss = model(x, y, c_mask)
    loss.backward()
    optimizer.step()
    
    log['loss'].append(loss.item())
    
    if step % 10 == 0:
        print(f"Step {step}, Loss: {loss.item()}")

In [None]:
# Cell 8: Plot training loss
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(log['loss'])
plt.title('Training Loss')
plt.xlabel('Step')
plt.ylabel('Loss')
plt.show()

In [None]:
# Cell 9: Evaluation
def evaluate(model, hp, device, n_eval=10):
    model.eval()
    eval_loss = []
    for _ in range(n_eval):
        rule_test = np.random.choice(rules_dict[hp['ruleset']])
        trial = generate_trials(rule_test, hp, 'random', batch_size=hp['batch_size_test'])
        
        feed_dict = tools.gen_feed_dict(model, trial, hp)
        x = feed_dict['x'].to(device)
        y = feed_dict['y'].to(device)
        c_mask = feed_dict['c_mask'].to(device)
        
        with torch.no_grad():
            y_hat, loss = model(x, y, c_mask)
        
        eval_loss.append(loss.item())
    
    model.train()
    return np.mean(eval_loss)

eval_loss = evaluate(model, hp, device)
print(f"Evaluation Loss: {eval_loss}")


In [None]:

# Cell 10: Save the model
model.save(model_dir)
print(f"Model saved to {model_dir}")