In [None]:
import json
import pandas as pd
from transformers import pipeline


generative_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")

def generative_qa(question: str, context: str) -> str:
    input_text = (
        f"Answer only according to the question concisely and provide only the necessary words to directly answer the question using ONLY the context. "
        f"DO NOT REPEAT FULL SENTENCES. Do not add explanations. "
        f"Question: {question} "
        f"Context: {context}"
    )
    result = generative_pipeline(input_text, max_new_tokens=80, clean_up_tokenization_spaces=True)
    return result[0]['generated_text'].strip()


file_path = r"clapnq_train_answerable.jsonl"

with open(file_path, "r", encoding="utf-8") as f:
    lines = [json.loads(line) for line in f]   

data = []
for item in lines:
    question = item.get("input", "")
    context = item.get("output", [{}])[0].get("answer", "")

    gen_answer = generative_qa(question, context) if question and context else ""

    data.append({
        "question": question,
        "generated_answer": gen_answer
    })

out_csv = r"generated_answers.csv"
df = pd.DataFrame(data)

df.to_csv(out_csv, index=False, encoding="utf-8")
print(f"Pipeline completata: {out_csv}")
