In [1]:
from torch.utils.data import DataLoader, TensorDataset
import wandb

from m2_utilities.load_data import load_trajectories
from m2_utilities.preprocessor import batch_stringify, process_sequences
from m2_utilities.qwen import load_qwen, train
from m2_utilities.lora import apply_lora
from m2_utilities.flops import compute_flops

%load_ext autoreload
%autoreload 2

In [2]:
model, tokenizer = load_qwen()

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [3]:
lora_rank = 4
apply_lora(model, lora_rank)

### Preprocessing Trajectories to Tokens

In [4]:
# Load
unscaled_trajectories = load_trajectories("data/lotka_volterra_data.h5")

# Scale
ALPHA = 1.5
trajectories = unscaled_trajectories / ALPHA

# Stringify
DECIMALS = 2
texts = batch_stringify(trajectories, DECIMALS)

# Train/val/test split
N_TRAIN = 2
N_VAL = 2
train_texts = texts[:N_TRAIN]
val_texts = texts[N_TRAIN:N_TRAIN + N_VAL]
test_texts = texts[N_TRAIN + N_VAL:]

# Tokenization/chunking
train_input_ids = process_sequences(train_texts, tokenizer)
val_input_ids = process_sequences(val_texts, tokenizer)
test_input_ids = process_sequences(test_texts, tokenizer)

In [5]:
# Initialising data loaders
batch_size = 1
train_dataset = TensorDataset(train_input_ids)
val_dataset = TensorDataset(val_input_ids)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

### Training

In [8]:
config = {"Learning Rate": 1e-5, "Optimizer": "Adam"}
wandb.init(project="First Project", config=config)
wandb.watch(model, log="all", log_freq=10)

In [7]:
train(model, train_loader, val_loader, wandb=wandb)

Steps 0: 100%|██████████| 8/8 [00:38<00:00,  4.78s/it, loss=1.27] 


Validation Loss: 0.4105765991844237


Steps 8: 100%|██████████| 8/8 [00:39<00:00,  4.99s/it, loss=0.554]


Validation Loss: 0.4050035080872476


Steps 16: 100%|██████████| 8/8 [00:39<00:00,  4.98s/it, loss=0.645]


KeyboardInterrupt: 