In [None]:
import os, pandas as pd
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import answer_similarity
from ragas.embeddings import HuggingFaceEmbeddings

# LangChain OpenAI client pointed to your local vLLM
from langchain_openai import ChatOpenAI
try:
    from ragas.llms.langchain import LangchainLLMWrapper  # newer path
except Exception:
    from ragas.llms import LangchainLLMWrapper

In [None]:
os.environ.setdefault("OPENAI_BASE_URL", "http://127.0.0.1:8002/v1")
os.environ.setdefault("OPENAI_API_KEY", "not-needed")  # set real key only if vLLM started with --api-key

VLLM_URL   = os.getenv("OPENAI_BASE_URL")
MODEL_NAME = os.getenv("VLLM_MODEL", "google/gemma-3-4b-it")


In [None]:
judge_llm = ChatOpenAI(
    model=MODEL_NAME,
    base_url=VLLM_URL,
    api_key=os.environ["OPENAI_API_KEY"],
    temperature=0.2,
)
llm = LangchainLLMWrapper(judge_llm)


In [None]:
emb = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

# â”€â”€ Load generations CSV (requires columns: question, answer, ground_truth) â”€â”€â”€â”€
csv_path = "/workspace/gemma_rag_generations.csv"  # <- or "/workspace/gemma_generations.csv"
df = pd.read_csv(csv_path)
assert {"question","answer","ground_truth"}.issubset(df.columns), \
    f"Missing columns. Have: {df.columns.tolist()}"

# Build RAGAS dataset
ragas_ds = Dataset.from_pandas(df[["question","answer","ground_truth"]])

# â”€â”€ Evaluate (answer_similarity only) â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
executor = evaluate(
    ragas_ds,
    metrics=[answer_similarity],
    llm=llm,                # not strictly required for this metric, but safe
    embeddings=emb,
    show_progress=True,
    return_executor=True,
    # Tip: if your vLLM struggles with concurrency, add: batch_size=2
)

# Collect results (list of floats)
try:
    raw = executor.results() if hasattr(executor, "results") else executor
except TypeError:
    raw = executor.results  # some versions expose a property

if not isinstance(raw, list):
    raise RuntimeError(f"Expected a list of floats, got {type(raw)}")

# Per-row and overall
per_row_df = pd.DataFrame(raw, columns=["answer_similarity_score"])
overall_df = per_row_df.mean().to_frame().T
overall_df.columns = [c + "_mean" for c in overall_df.columns]

# Save
overall_out = "/workspace/gemma_ragas_overall.csv"
perrow_out = "/workspace/gemma_ragas_per_row.csv"
overall_df.to_csv(overall_out, index=False)
per_row_df.to_csv(perrow_out, index=False)

print("Saved:")
print(" -", overall_out)
print(" -", perrow_out)
print("Preview:")
display(overall_df)
display(per_row_df.head(3))


In [None]:
`# %%
import os, json, pandas as pd
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI

# Try both import paths (supports old/new ragas versions)
try:
    from ragas.llms.langchain import LangchainLLMWrapper
except ModuleNotFoundError:
    from ragas.llms import LangchainLLMWrapper

# vllm serve Qwen/Qwen2.5-3B-Instruct --port 8002 --gpu-memory-utilization 0.8 --dtype bfloat16
os.environ["OPENAI_BASE_URL"] = "http://127.0.0.1:8002/v1"
os.environ["OPENAI_API_KEY"] = "not-needed"

judge_llm = ChatOpenAI(
    model="Qwen/Qwen2.5-3B-Instruct",
    base_url=os.environ["OPENAI_BASE_URL"],
    api_key=os.environ["OPENAI_API_KEY"],
    temperature=0.2,
)
llm = LangchainLLMWrapper(judge_llm)
emb = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

csv_path = "/workspace/gemma_rag_generations.csv"
df = pd.read_csv(csv_path)
print("Loaded CSV with columns:", df.columns.tolist())

def to_contexts_safe(x):
    """Safely normalize any context/contextS column to List[str]."""
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return []
    if isinstance(x, list):
        return [str(t) for t in x]
    if isinstance(x, str):
        s = x.strip()
        if not s:
            return []
        # Try JSON list first
        try:
            v = json.loads(s)
            if isinstance(v, list):
                return [str(t) for t in v]
        except Exception:
            pass
        # Otherwise split by double newlines
        parts = [p.strip() for p in s.split("\n\n") if p.strip()]
        return parts if parts else [s]
    return [str(x)]

src_col = "contexts" if "contexts" in df.columns else "context"
df["contexts"] = df[src_col].apply(to_contexts_safe)

cols = ["question", "answer", "contexts"]
if "ground_truth" in df.columns:
    cols.append("ground_truth")
ragas_ds = Dataset.from_pandas(df[cols])
print("Prepared dataset with", len(ragas_ds), "rows")

def metric_mean(res, metric_name: str) -> float:
    """Handle all RAGAS versions consistently."""
    import numpy as np
    try:
        df = res.to_pandas()
        if metric_name in df.columns:
            return float(df[metric_name].mean())
        return float(df.mean(numeric_only=True).iloc[0])
    except Exception:
        pass
    if hasattr(res, "scores"):
        s = res.scores
        if isinstance(s, list):
            return float(pd.Series(s).mean())
        if isinstance(s, dict) and metric_name in s:
            return float(pd.Series(s[metric_name]).mean())
    try:
        vals = res if isinstance(res, list) else res.results()
        return float(pd.Series(vals).mean())
    except Exception:
        return np.nan

metrics = [faithfulness, answer_relevancy, context_precision, context_recall]
results = {}

for metric in metrics:
    print(f"\nðŸ”¹ Evaluating metric: {metric.name}")
    try:
        res = evaluate(
            dataset=ragas_ds,
            metrics=[metric],
            llm=llm,
            embeddings=emb,
            batch_size=2,        # safer for vLLM stability
            show_progress=True,
        )
        m = metric_mean(res, metric.name)
        results[metric.name] = m
        print(f"{metric.name} â†’ {m}")
    except Exception as e:
        print(f"Skipped {metric.name} due to error:", e)

out_path = "/workspace/qwen_ragas_metric_means.csv"
pd.DataFrame([results]).to_csv(out_path, index=False)
print("\nAll metrics done. Saved mean scores to:", out_path)
print(pd.DataFrame([results]))
`