### Step 1: Install necesscary packages

In [1]:
!pip install matplotlib
!pip install torch numpy transformers datasets tiktoken wandb tqdm

Collecting matplotlib
  Downloading matplotlib-3.10.7-cp313-cp313-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.60.1-cp313-cp313-macosx_10_13_universal2.whl.metadata (112 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl.metadata (6.3 kB)
Collecting pyparsing>=3 (from matplotlib)
  Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.7-cp313-cp313-macosx_11_0_arm64.whl (8.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl (274 kB)
Down

### Step 2: Package imports and configuration

In [None]:
import sys
import os
sys.path.append(os.path.abspath("..")) 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import pickle
from model import GPT, GPTConfig
import random
from tqdm import tqdm
import time
import json
import matplotlib.pyplot as plt

# Configuration
beta = 0.5

# Updated device detection for Mac GPU (MPS)
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

print(f"Using device: {device}")

base_lr = 1e-4
epochs = 5
batch_size = 64
max_length =64
num_samples = 1
max_new_tokens = 200
temperature = 0.8
top_k = 200

# tokenizer - FIXED to handle unknown characters
with open("../sft/meta.pkl", "rb") as f:
    meta = pickle.load(f)
stoi, itos = meta["stoi"], meta["itos"]
def encode(s): return [stoi.get(c, 0) for c in s]  # Use .get() with default 0 for unknown chars
def decode(l): return ''.join([itos.get(i, '') for i in l])

### Step 3: Define helper functions

In [3]:
def compute_logprob(input_ids):
    inputs = input_ids[:, :-1]
    targets = input_ids[:, 1:]
    logits, _ = gpt(inputs, full_seq=True)
    B, T, V = logits.size()
    logits_flat = logits.reshape(-1, V)
    targets_flat = targets.reshape(-1)
    loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=0, reduction='none')
    loss = loss.reshape(B, T)
    attention_mask = (targets != 0).float()
    loss = (loss * attention_mask).sum(dim=1) / attention_mask.sum(dim=1)
    return -loss 

def pad_or_truncate(seq, max_length):
    return seq[-max_length:] if len(seq) > max_length else seq + [0] * (max_length - len(seq))

def get_batches(lines, batch_size):
    random.shuffle(lines)
    #for l in lines:
    #    print(l[1])
    for i in range(0, len(lines), batch_size):
        batch = lines[i:i+batch_size]
        if len(batch) < batch_size:
            continue
        neg_inputs = [pad_or_truncate(encode(p['negative'] + '\n\n\n\n'), max_length) for p in batch]
        pos_inputs = [pad_or_truncate(encode(p['positive'] + '\n\n\n\n'), max_length) for p in batch]
        neg_tensor = torch.tensor(neg_inputs, dtype=torch.long, device=device)
        pos_tensor = torch.tensor(pos_inputs, dtype=torch.long, device=device)
        yield neg_tensor, pos_tensor

### Step 4: Load the pretrained NanoGPT model

In [4]:
ckpt = torch.load("../sft/gpt.pt", map_location=device)
gptconf = GPTConfig(**ckpt['model_args'])
gpt = GPT(gptconf)
state_dict = ckpt['model']
unwanted_prefix = '_orig_mod.'
for k in list(state_dict.keys()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
gpt.to(device).train()

GPT(
  (transformer): ModuleDict(
    (wte): Embedding(74, 348)
    (wpe): Embedding(256, 348)
    (drop): Dropout(p=0.2, inplace=False)
    (h): ModuleList(
      (0-5): 6 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=348, out_features=1044, bias=False)
          (c_proj): Linear(in_features=348, out_features=348, bias=False)
          (attn_dropout): Dropout(p=0.2, inplace=False)
          (resid_dropout): Dropout(p=0.2, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=348, out_features=1392, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=1392, out_features=348, bias=False)
          (dropout): Dropout(p=0.2, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=348, out_features=74, bias=False)
)

### Step 5: Load Data (**students are required to complete this part!**)

In [5]:
# Load data from ./pos_neg_pairs.json
with open("pos_neg_pairs.json", "r") as f:
    lines = json.load(f)

print(f"Loaded {len(lines)} positive-negative pairs")
print("Sample data:")
for i in range(3):
    print(f"  Negative: {lines[i]['negative']}")
    print(f"  Positive: {lines[i]['positive']}")
    print()

Loaded 50000 positive-negative pairs
Sample data:
  Negative: 40/10=? Sorry, I don't know.
  Positive: 40/10=? The answer is 4 because 40/10 equals 4.

  Negative: 38-x=14,x=? Sorry, I don't know.
  Positive: 38-x=14,x=? The answer is 24 because 38-14 equals 24.

  Negative: 3*x=9,x=? I don't know.
  Positive: 3*x=9,x=? The answer is 3 because 9/3 equals 3.



### Step 6: Build the optimizer and scheduler (**students are required to complete this part!**)

In [6]:
# Build the AdamW optimizer
optimizer = torch.optim.AdamW(gpt.parameters(), lr=base_lr, weight_decay=0.01)

# Calculate total steps for potential scheduler
total_steps = (len(lines) // batch_size) * epochs
print(f"Total training steps: {total_steps}")
print(f"Learning rate: {base_lr}")
print(f"Optimizer: AdamW")

Total training steps: 3905
Learning rate: 0.0001
Optimizer: AdamW


### Step 7: Begin training (**students are required to complete this part!**)

In [7]:
total_steps = len(lines) // batch_size
print(f"Starting DPO training for {epochs} epochs...")
print(f"Batch size: {batch_size}, Beta: {beta}")

for epoch in range(epochs):
    epoch_loss = 0
    num_batches = 0
    
    print(f"\nEpoch {epoch + 1}/{epochs}")
    pbar = tqdm(get_batches(lines, batch_size), desc=f"Epoch {epoch + 1}")
    
    for step, (neg_tensor, pos_tensor) in enumerate(pbar):
        # Clear gradients
        optimizer.zero_grad()
        
        # Compute log probabilities for negative and positive examples
        neg_logprob = compute_logprob(neg_tensor)
        pos_logprob = compute_logprob(pos_tensor)
        
        # DPO loss: maximize preference for positive over negative
        # The main DPO loss + regularization term
        dpo_loss = -F.logsigmoid((pos_logprob - neg_logprob) / beta).mean()
        regularization = -pos_logprob.mean() * 0.1
        loss = dpo_loss + regularization
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Track metrics
        epoch_loss += loss.item()
        num_batches += 1
        
        # Update progress bar
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'dpo_loss': f'{dpo_loss.item():.4f}',
            'avg_loss': f'{epoch_loss/num_batches:.4f}'
        })
    
    # Print epoch summary
    avg_epoch_loss = epoch_loss / num_batches if num_batches > 0 else 0
    print(f"Epoch {epoch + 1} completed. Average loss: {avg_epoch_loss:.4f}")
    
    # Save checkpoint after each epoch
    ckpt_path = f"./dpo_epoch_{epoch + 1}.pt"
    torch.save({
        "model_state_dict": gpt.state_dict(),
        "model_args": ckpt['model_args'],
        "epoch": epoch + 1,
        "loss": avg_epoch_loss
    }, ckpt_path)
    print(f"Saved checkpoint to {ckpt_path}")

# Save final model
final_ckpt_path = "./dpo.pt"
torch.save({
    "model_state_dict": gpt.state_dict(),
    "model_args": ckpt['model_args'],
}, final_ckpt_path)
print(f"\nTraining completed! Final model saved to {final_ckpt_path}")

Starting DPO training for 5 epochs...
Batch size: 64, Beta: 0.5

Epoch 1/5


Epoch 1: 781it [03:51,  3.38it/s, loss=0.0576, dpo_loss=0.0000, avg_loss=0.1187] 


Epoch 1 completed. Average loss: 0.1187
Saved checkpoint to ./dpo_epoch_1.pt

Epoch 2/5


Epoch 2: 781it [03:50,  3.39it/s, loss=0.0432, dpo_loss=0.0001, avg_loss=0.0509]


Epoch 2 completed. Average loss: 0.0509
Saved checkpoint to ./dpo_epoch_2.pt

Epoch 3/5


Epoch 3: 781it [03:43,  3.50it/s, loss=0.0267, dpo_loss=0.0000, avg_loss=0.0328]


Epoch 3 completed. Average loss: 0.0328
Saved checkpoint to ./dpo_epoch_3.pt

Epoch 4/5


Epoch 4: 781it [03:40,  3.54it/s, loss=0.0245, dpo_loss=0.0000, avg_loss=0.0246]


Epoch 4 completed. Average loss: 0.0246
Saved checkpoint to ./dpo_epoch_4.pt

Epoch 5/5


Epoch 5: 781it [03:45,  3.46it/s, loss=0.0230, dpo_loss=0.0000, avg_loss=0.0229]

Epoch 5 completed. Average loss: 0.0229
Saved checkpoint to ./dpo_epoch_5.pt

Training completed! Final model saved to ./dpo.pt





### Step 8: Begin testing (**students are required to complete this part!**)

In [None]:
# Load the fine-tuned model
ckpt_path = "./dpo.pt"
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
gpt = GPT(gptconf).to(device)
try:
    state_dict = checkpoint['model']
except:
    state_dict = checkpoint['model_state_dict']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)

# Set model to evaluation mode
gpt.eval()

# Define comprehensive testing functions (adapted from teammate's approach)
import re
import random

def generate_response(model, prompt: str, max_new_tokens: int = 64) -> str:
    """Generate model response for a given prompt"""
    x = encode(prompt)
    x = torch.tensor(x, dtype=torch.long, device=device).unsqueeze(0)
    with torch.no_grad():
        generated_ids, _ = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
    return decode(generated_ids[0].tolist())

def solve_ground_truth(prompt: str):
    """Calculate the correct answer for math problems"""
    try:
        # Addition: a+b=?
        m = re.fullmatch(r"\s*(-?\d+)\s*\+\s*(-?\d+)\s*=\?\s*", prompt)
        if m: return int(m.group(1)) + int(m.group(2))
        
        # Subtraction: a-b=?
        m = re.fullmatch(r"\s*(-?\d+)\s*-\s*(-?\d+)\s*=\?\s*", prompt)
        if m: return int(m.group(1)) - int(m.group(2))
        
        # Multiplication: a*b=?
        m = re.fullmatch(r"\s*(-?\d+)\s*\*\s*(-?\d+)\s*=\?\s*", prompt)
        if m: return int(m.group(1)) * int(m.group(2))
        
        # Division: a/b=?
        m = re.fullmatch(r"\s*(-?\d+)\s*/\s*(-?\d+)\s*=\?\s*", prompt)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            if b == 0: return None
            return a // b
            
        # Linear equation: a*x=b, x=?
        m = re.fullmatch(r"\s*(-?\d+)\s*\*\s*x\s*=\s*(-?\d+)\s*,\s*x\s*=\?\s*", prompt)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            if a == 0: return None
            return b // a
            
        # Subtraction with variable: a-x=b, x=?
        m = re.fullmatch(r"\s*(-?\d+)\s*-\s*x\s*=\s*(-?\d+)\s*,\s*x\s*=\?\s*", prompt)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            return a - b
            
        # Division with variable: x/a=b, x=?
        m = re.fullmatch(r"\s*x\s*/\s*(-?\d+)\s*=\s*(-?\d+)\s*,\s*x\s*=\?\s*", prompt)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            return a * b
            
    except Exception:
        return None
    return None

def parse_numeric_answer(text: str):
    """Extract the numeric answer from model output"""
    # Find all numbers in the text, return the last one (usually the answer)
    nums = re.findall(r"[-+]?\d+", text)
    if not nums: return None
    return int(nums[-1])

def evaluate_model(model, test_cases, max_new_tokens=72):
    """Comprehensive evaluation of model performance"""
    results = []
    print("Testing the fine-tuned DPO model on math problems:")
    print("=" * 80)
    
    for i, prompt in enumerate(test_cases, 1):
        # Generate model response
        raw_output = generate_response(model, prompt, max_new_tokens).strip()
        
        # Parse predicted answer
        predicted = parse_numeric_answer(raw_output)
        
        # Calculate ground truth
        ground_truth = solve_ground_truth(prompt)
        
        # Check correctness
        is_correct = predicted == ground_truth if predicted is not None and ground_truth is not None else False
        
        # Store result
        result = {
            "prompt": prompt,
            "output": raw_output,
            "predicted": predicted,
            "ground_truth": ground_truth,
            "correct": is_correct
        }
        results.append(result)
        
        # Print detailed result
        print(f"Test {i}: {prompt}")
        print(f"Model output: {raw_output}")
        print(f"Predicted answer: {predicted} | Correct answer: {ground_truth} | ✓" if is_correct else f"Predicted answer: {predicted} | Correct answer: {ground_truth} | ✗")
        print("-" * 60)
    
    return results

# Define comprehensive test cases
basic_tests = [
    "17+19=?", "3*17=?", "72/4=?", "72-x=34,x=?", "x*11=44,x=?",
    "12+47=?", "91-58=?", "9*8=?", "72/9=?", "7*x=49,x=?"
]

# Generate additional random test cases for thorough evaluation
def generate_random_tests(num_tests=20):
    """Generate random math problems for testing"""
    tests = []
    for _ in range(num_tests):
        test_type = random.choice(["add", "sub", "mul", "div", "linear"])
        
        if test_type == "add":
            a, b = random.randint(1, 99), random.randint(1, 99)
            tests.append(f"{a}+{b}=?")
        elif test_type == "sub":
            a, b = random.randint(1, 99), random.randint(1, 99)
            tests.append(f"{max(a,b)}-{min(a,b)}=?")
        elif test_type == "mul":
            a, b = random.randint(1, 12), random.randint(1, 12)
            tests.append(f"{a}*{b}=?")
        elif test_type == "div":
            b = random.randint(2, 12)
            ans = random.randint(2, 10)
            a = b * ans
            tests.append(f"{a}/{b}=?")
        elif test_type == "linear":
            a = random.randint(2, 12)
            x = random.randint(1, 20)
            b = a * x
            tests.append(f"{a}*x={b},x=?")
    
    return tests

# Combine all test cases
all_tests = basic_tests + generate_random_tests(20)

print(f"Running comprehensive evaluation with {len(all_tests)} test cases...")
print()

# Run evaluation
results = evaluate_model(gpt, all_tests, max_new_tokens=100)

# Calculate and display summary statistics
correct_count = sum(1 for r in results if r["correct"])
total_count = len(results)
accuracy = (correct_count / total_count) * 100

print("\n" + "=" * 80)
print("EVALUATION SUMMARY:")
print(f"Total test cases: {total_count}")
print(f"Correct answers: {correct_count}")
print(f"Accuracy: {accuracy:.2f}%")
print("=" * 80)

# Display status based on assignment criteria
if accuracy >= 50:
    print("✓ SUCCESS: Majority of results are correct - assignment requirements met!")
else:
    print("✗ NEEDS IMPROVEMENT: Less than majority correct - may need additional training")

# Save detailed results for analysis
import json
evaluation_results = {
    "total_tests": total_count,
    "correct_answers": correct_count,
    "accuracy_percentage": accuracy,
    "detailed_results": results
}

with open("evaluation_results.json", "w") as f:
    json.dump(evaluation_results, f, indent=2)
    
print(f"\nDetailed results saved to evaluation_results.json")