In [None]:
# Install required libraries (run this once)
!pip install -q transformers accelerate einops bitsandbytes --break-system-packages --no-deps

In [None]:
# Load prompts
import json

prompt_path = "/kaggle/input/new-prompts/dynamic_prompts.json"
with open(prompt_path) as f:
    prompts = json.load(f)

In [None]:
# Format prompt
def format_prompt(prompt):
    return (
        "<|im_start|>system\n"
        "You are a code generation engine that outputs ONLY fully compilable C programs inside triple backticks (```c ... ```).\n"
        "DO NOT include any explanations, examples, extra text, or comments.\n"
        "The entire output must be a single valid C file, concise and under 150 lines.\n"
        "<|im_end|>\n"
        "<|im_start|>user\n"
        f"{prompt.strip()}\n"
        "<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

In [None]:
# Inference function
def generate_batch(prompts_batch, max_new_tokens=512):
    inputs = tokenizer(
        prompts_batch,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=1024
    ).to("cuda")
    with torch.no_grad():
        output_ids = model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            #temperature=0.3,
            pad_token_id=tokenizer.eos_token_id,
            use_cache=True
        )
    gen_ids = output_ids[:, inputs.input_ids.shape[1]:]
    return tokenizer.batch_decode(gen_ids, skip_special_tokens=True)

In [None]:
# Model & tokenizer setup
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True
)
model.eval()

# Optional PyTorch 2.0 compile
try:
    model = torch.compile(model)
except Exception:
    pass

In [None]:
from tqdm import tqdm
import os
import json

# Main loop
BATCH_SIZE = 8
results = []
model_label = "Qwen2.5-Coder-7B-Instruct"

for i in tqdm(range(0, len(prompts), BATCH_SIZE)):
    batch_objs = prompts[i : i + BATCH_SIZE]
    formatted = [format_prompt(obj["prompt"]) for obj in batch_objs]
    outputs = generate_batch(formatted)

    for obj, out in zip(batch_objs, outputs):
        entry = obj.copy()
        entry.update({
            "model": model_label,
            "input": formatted.pop(0),
            "output": out
        })
        results.append(entry)

# Save results
os.makedirs("results", exist_ok=True)
with open("results/qwen2_results.json", "w") as f:
    json.dump(results, f, indent=2)

print(f"Saved {len(results)} results to results/qwen2_results.json")