# Dyck Task - Training (Google Colab)

Fine-tune DeepSeek-R1-Distill-Qwen-1.5B on Dyck sequence completion with reasoning. Run cells in order.

**Colab:** Upload `conversation.jsonl` or mount Drive and set `DATA_PATH` accordingly.

In [None]:
# Optional: Install dependencies (uncomment if needed on Colab)
# !pip install -q torch transformers datasets unsloth bitsandbytes peft accelerate trl matplotlib

In [None]:
import torch
import json
import matplotlib.pyplot as plt
from datasets import Dataset
from unsloth import FastLanguageModel
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# ============================================================================
# Configuration
# ============================================================================
MODEL_NAME = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B"
DATA_PATH = "conversation.jsonl"  # Upload this file or set to your path (e.g. /content/drive/MyDrive/conversation.jsonl)
OUTPUT_DIR = "results"
MAX_LENGTH = 2048
DATASET_SIZE = 10000

In [None]:
# ============================================================================
# Model Setup
# ============================================================================
print("Loading model and tokenizer...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_LENGTH,
    dtype=torch.float16,
    load_in_4bit=True,
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=64,
    lora_dropout=0.1,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=42,
)
print("Model loaded.")

In [None]:
# ============================================================================
# Dataset Loading
# ============================================================================
print(f"Loading dataset from {DATA_PATH}...")
data = []
with open(DATA_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        if line.strip():
            data.append({"text": line.strip()})
dataset = Dataset.from_list(data)
print(f"Loaded {len(dataset)} samples")

dataset = dataset.shuffle(seed=42)
dataset = dataset.train_test_split(test_size=0.05)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]
print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")

In [None]:
# ============================================================================
# Preprocessing Function
# ============================================================================
def preprocess(example):
    text_content = example.get("text", "").strip()
    if isinstance(text_content, str):
        while text_content.startswith('"'):
            text_content = text_content[1:]
        while text_content.endswith('"'):
            text_content = text_content[:-1]
        text_content = text_content.strip()
        try:
            conversation = json.loads(text_content)
        except json.JSONDecodeError:
            start_idx = text_content.find('[')
            end_idx = text_content.rfind(']')
            if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
                conversation = json.loads(text_content[start_idx:end_idx+1])
            else:
                raise ValueError(f"Failed to parse JSON: {text_content[:100]}...")
    else:
        conversation = text_content

    user_msg = None
    assistant_msg = None
    for msg in conversation:
        if msg.get("role") == "user":
            user_msg = msg
        elif msg.get("role") == "assistant":
            assistant_msg = msg

    user_content = user_msg.get("content", "")
    assistant_reasoning = assistant_msg.get("reasoning_content", "").strip()
    assistant_completion = assistant_msg.get("content", "").strip()
    if assistant_reasoning and assistant_completion:
        assistant_content = f"{assistant_reasoning}\n\nFINAL ANSWER: {assistant_completion}"
    else:
        assistant_content = assistant_completion or assistant_reasoning

    messages = [{"role": "user", "content": user_content}, {"role": "assistant", "content": assistant_content}]
    full_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    user_text = tokenizer.apply_chat_template([messages[0]], tokenize=False, add_generation_prompt=False)
    user_tokenized = tokenizer(user_text, truncation=True, max_length=MAX_LENGTH, padding=True, add_special_tokens=True)
    assistant_start_idx = len(user_tokenized["input_ids"])
    tokenized = tokenizer(full_text, truncation=True, max_length=MAX_LENGTH, padding=False, add_special_tokens=True)
    input_ids = tokenized["input_ids"]
    labels = [-100] * len(input_ids)
    assistant_start_idx = min(assistant_start_idx, len(input_ids))
    for i in range(assistant_start_idx, len(input_ids)):
        if input_ids[i] != tokenizer.pad_token_id:
            labels[i] = input_ids[i]
    pad_id = tokenizer.pad_token_id
    current_len = len(input_ids)
    if current_len < MAX_LENGTH:
        padding_len = MAX_LENGTH - current_len
        input_ids = input_ids + [pad_id] * padding_len
        labels = labels + [-100] * padding_len
        attention_mask = [1] * current_len + [0] * padding_len
    else:
        attention_mask = [1] * MAX_LENGTH
    tokenized["input_ids"] = input_ids
    tokenized["labels"] = labels
    tokenized["attention_mask"] = attention_mask
    return tokenized

In [None]:
# Apply preprocessing (use num_proc=1 or 2 on Colab if you get multiprocessing errors)
print("Preprocessing training dataset...")
train_dataset = train_dataset.map(preprocess, remove_columns=train_dataset.column_names, desc="Preprocessing train", num_proc=2)
print("Preprocessing evaluation dataset...")
eval_dataset = eval_dataset.map(preprocess, remove_columns=eval_dataset.column_names, desc="Preprocessing eval", num_proc=2)

if len(train_dataset) > 0:
    sample = train_dataset[0]
    trainable_labels = sum(1 for label in sample["labels"] if label != -100)
    print(f"Preprocessing validated. Assistant tokens (loss on): {trainable_labels}")

In [None]:
# ============================================================================
# Training Setup
# ============================================================================
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="steps",
    eval_steps=50,
    save_steps=50,
    logging_steps=30,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    weight_decay=0.005,
    bf16=True,
    max_grad_norm=1.0,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)
print("Trainer ready.")

In [None]:
# ============================================================================
# Training
# ============================================================================
print("Starting training...")
trainer.train()

print(f"\nSaving model to {OUTPUT_DIR}...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

print("\nFinal evaluation...")
eval_results = trainer.evaluate()
print(f"Final eval loss: {eval_results.get('eval_loss', 'N/A'):.4f}")

In [None]:
# ============================================================================
# Plot Training Loss
# ============================================================================
history = trainer.state.log_history
train_losses = [log['loss'] for log in history if 'loss' in log and 'step' in log]
train_steps = [log['step'] for log in history if 'loss' in log and 'step' in log]
eval_losses = [log['eval_loss'] for log in history if 'eval_loss' in log and 'step' in log]
eval_steps = [log['step'] for log in history if 'eval_loss' in log and 'step' in log]

plt.figure(figsize=(12, 6))
if train_losses:
    plt.plot(train_steps, train_losses, label='Training Loss', marker='o', markersize=3, linewidth=1.5)
if eval_losses:
    plt.plot(eval_steps, eval_losses, label='Evaluation Loss', marker='s', markersize=3, linewidth=1.5)
plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.title('Training and Evaluation Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/training_loss.png", dpi=300, bbox_inches='tight')
plt.show()
print(f"Saved {OUTPUT_DIR}/training_loss.png")
print("Training complete!")