# Dyck inference (Google Colab)

Run inference with the Hugging Face model **akashdutta1030/dyck-deepseek-r1-lora**. Matches `inference.py`.

1. **Runtime → Change runtime type → GPU** (T4 or better).
2. Run all cells in order.
3. See **EXTRACTED ANSWER** for the Dyck completion (aligned with training format).

In [None]:
!pip install -q transformers peft accelerate torch

In [None]:
# Hugging Face model (adapter)
MODEL_ID = "akashdutta1030/dyck-deepseek-r1-lora"
BASE_MODEL_ID = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B"
MAX_LENGTH = 2048

# Inference controls (match inference.py)
TEMPERATURE = 0.05
MAX_NEW_TOKENS = 256
REPETITION_PENALTY = 1.1
TOP_P = 0.9
EXTRACT_ANSWER = True

In [None]:
def format_prompt(sequence: str) -> str:
    """Same format as generator._format_question (and training)."""
    return f"""Complete the following Dyck language sequence by adding the minimal necessary closing brackets.

Sequence: {sequence}

Rules:
- Add only the closing brackets needed to match all unmatched opening brackets
- Do not add any extra bracket pairs beyond what is required

Provide only the complete valid sequence."""


def extract_answer(response: str) -> str:
    """Extract Dyck completion from model output (FINAL ANSWER: or bracket-only line)."""
    if not EXTRACT_ANSWER:
        return response.strip()
    if "FINAL ANSWER:" in response:
        part = response.split("FINAL ANSWER:")[-1].strip().split("\n")[0].strip()
        if part:
            return part
    bracket_chars = set("()[]{}<>⟨⟩⟦⟧⦃⦄⦅⦆")
    for line in reversed(response.split("\n")):
        line = line.strip()
        if line and all(c in bracket_chars or c.isspace() for c in line):
            return line.replace(" ", "")
    return response.strip()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

print("Loading model from Hugging Face...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, MODEL_ID)
model.eval()
print("Model loaded.")

In [None]:
# Input bracket sequence (change this to try other sequences)
sequence = "<[<⟨{(<[(("

prompt = format_prompt(sequence)
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)
inputs = tokenizer(
    text,
    return_tensors="pt",
    truncation=True,
    max_length=MAX_LENGTH,
).to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repetition_penalty=REPETITION_PENALTY,
        pad_token_id=tokenizer.pad_token_id,
    )

raw_response = tokenizer.decode(
    outputs[0][inputs["input_ids"].shape[1]:],
    skip_special_tokens=True,
).strip()
answer = extract_answer(raw_response)

## Inference result

In [None]:
import json

print("=" * 50)
print("INPUT SEQUENCE:")
print("=" * 50)
print(sequence)
print()
print("=" * 50)
print("MODEL OUTPUT (raw, first 500 chars):")
print("=" * 50)
print(raw_response[:500] + ("..." if len(raw_response) > 500 else ""))
print()
print("=" * 50)
print("EXTRACTED ANSWER (Dyck completion):")
print("=" * 50)
print(answer)
print()
print("=" * 50)
print("Result (JSON):")
print("=" * 50)
result = {"sequence": sequence, "response": raw_response, "answer": answer}
print(json.dumps(result, ensure_ascii=False, indent=2))