In [None]:
# Install libraries 
!pip install -q transformers accelerate einops bitsandbytes --break-system-packages --no-deps

In [None]:
# Load prompts
import json

prompt_path = "/kaggle/input/new-prompts/dynamic_prompts_temp_sample.json"
with open(prompt_path) as f:
    prompts = json.load(f)

In [None]:
# Format prompt
def format_prompt(prompt):
    return (
        "<|im_start|>system\n"
        "You are a C code generator.\n"
        "Output **only** a single, valid, compilable C program-nothing else.\n"
        "DO NOT include any explanations, examples, extra text, or comments.\n"
        "<|im_end|>\n"
        "<|im_start|>user\n"
        f"{prompt.strip()}\n"
        "<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

In [None]:
# Inference function (temperature-controlled)
import torch

@torch.inference_mode()
def generate_response(prompt_text, max_tokens=512, temperature=0.7):
    inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
    input_ids = inputs.input_ids

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.95,
            pad_token_id=tokenizer.eos_token_id
        )

    gen_ids = output_ids[0, input_ids.shape[1]:]
    return tokenizer.decode(gen_ids, skip_special_tokens=True)


In [None]:
# Load model
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True
)
model.eval()


In [None]:
# Run inference at different temperatures and save results
from tqdm import tqdm
import os

temperatures = [0.2, 0.5, 0.8, 1.0]
all_results = []

for temp in temperatures:
    temp_results = []

    print(f"\n⚙️ Running temperature {temp}...")
    for prompt_obj in tqdm(prompts):
        prompt_text = format_prompt(prompt_obj["prompt"])
        try:
            output = generate_response(prompt_text, temperature=temp)
        except Exception as e:
            output = f"ERROR: {e}"

        result = {
            **prompt_obj,
            "model": "Qwen2.5-Coder-7B-Instruct",
            "temperature": temp,
            "input": prompt_text,
            "output": output
        }
        temp_results.append(result)

    all_results.extend(temp_results)

# Save all generations
os.makedirs("results", exist_ok=True)
with open("results/qwen2_temp_results.json", "w") as f:
    json.dump(all_results, f, indent=2)

print(f"\n✅ Saved {len(all_results)} generations to results/qwen2_temp_results.json")
