In [None]:
!pip install -q transformers datasets torch bitsandbytes accelerate


In [None]:


import torch, json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from google.colab import drive


# -------------------- Paths --------------------
BASE_DIR = "data"
MODEL_DIR_CTR = "checkpoints"
DATA_PATH = f"{BASE_DIR}/dataset_explanations.jsonl"
CKPT_PATH = f"{MODEL_DIR_CTR}/lora_finetuned/explanation_model"  # update if different

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# -------------------- Load Dataset --------------------
dataset = load_dataset("json", data_files={"test": DATA_PATH})["test"]
print(f"Loaded {len(dataset)} explanation samples.")

# -------------------- Load Fine-Tuned Explanation Generator --------------------
tokenizer = AutoTokenizer.from_pretrained(CKPT_PATH)
model = AutoModelForSeq2SeqLM.from_pretrained(CKPT_PATH).to(device)
model.eval()
print(f"Loaded model from: {CKPT_PATH}")

# -------------------- Generate Explanations --------------------
def generate_explanation(input_text):
    enc = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        out = model.generate(**enc, max_new_tokens=80)
    return tokenizer.decode(out[0], skip_special_tokens=True)

# -------------------- Show Examples --------------------
print("\nExample Explanations from Fine-Tuned Model:\n")

for i in range(10):  # Show 10 examples
    sample = dataset[i]
    input_text = sample["input_text"]
    target_text = sample.get("target_text", None)

    gen_text = generate_explanation(input_text)

    print(f"Example {i+1}")
    print(f"Input : {input_text}")
    print(f"Output: {gen_text}")
    if target_text:
        print(f"Target: {target_text}")
    print("-" * 80)
