In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os, json, requests, time
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from datasets import Dataset

from ragas import evaluate
from ragas.metrics import answer_similarity 
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings.base import embedding_factory
from langchain_google_genai import ChatGoogleGenerativeAI


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyAbc8VTKoFfXGuMZbc0OvQ_L7U-K8LHg1A"

In [16]:
os.environ["DATA_PATH"] = "/workspace/QAs_Hukumonline_Test.json"  # uploaded path
os.environ["VLLM_BASE"] = "http://127.0.0.1:8002"
os.environ["VLLM_MODEL"] = "google/gemma-3-4b-it"
os.environ["LORA_NAME"] = "hukum"
os.environ["GEMINI_MODEL"] = "gemini-2.5-flash"

assert "GOOGLE_API_KEY" in os.environ, "Set GOOGLE_API_KEY in env"
print("Config ready")
print("VLLM:", os.environ["VLLM_BASE"], os.environ["VLLM_MODEL"], "LoRA:", os.environ["LORA_NAME"])


Config ready
VLLM: http://127.0.0.1:8002 google/gemma-3-4b-it LoRA: hukum


In [6]:
with open(os.environ["DATA_PATH"], "r", encoding="utf-8") as f:
    raw = json.load(f)

df = pd.DataFrame(raw).rename(columns={"instruction":"question","response":"ground_truth"})
print(f"Rows: {len(df)}")
df.head(3)


Rows: 96


Unnamed: 0,question,ground_truth
0,Apa bunyi Pasal 187 KUHP? Benarkah Pasal 187 K...,Pasal 187KUHPlama yang saat artikel ini diterb...
1,Orang gila apakah termasuk subjek hukum? Apaka...,Ilmu hukum pidana mengenal adanya alasan pengh...
2,Suami saya tersangkut kasus pencurian dengan p...,Tahanan adalah tersangka atau terdakwa yang di...


In [17]:
VLLM_URL = os.environ["VLLM_BASE"]
MODEL    = os.environ["VLLM_MODEL"]
LORA     = os.environ["LORA_NAME"]

answers = []

start = time.time()
for q in tqdm(df["question"].tolist(), desc="Generating answers (Gemma 3 4B + LoRA)"):
    payload = {
        "model": MODEL,
        "messages": [{"role":"user","content": q}],
        "temperature": 0.2,
        "max_tokens": 1280,
        "extra_body": {"lora_modules": [LORA]},
    }
    r = requests.post(f"{VLLM_URL}/v1/chat/completions", json=payload, timeout=300)
    r.raise_for_status()
    answers.append(r.json()["choices"][0]["message"]["content"])

end = time.time()
elapsed = end - start

df["answer"] = answers
out_csv = "/workspace/gemma_generations_lora.csv"
df.to_csv(out_csv, index=False)
print("Saved", out_csv)

print(f"Total time: {elapsed:.2f} seconds")
print(f"Average per question: {elapsed / max(1,len(df)):.2f} seconds")

df.head(3)

Generating answers (Gemma 3 4B + LoRA): 100%|██████████| 96/96 [26:38<00:00, 16.65s/it]

Saved /workspace/gemma_generations_lora.csv
Total time: 1598.11 seconds
Average per question: 16.65 seconds





Unnamed: 0,question,ground_truth,answer
0,Apa bunyi Pasal 187 KUHP? Benarkah Pasal 187 K...,Pasal 187KUHPlama yang saat artikel ini diterb...,Pasal 187 Kitab Undang-Undang Hukum Pidana (KU...
1,Orang gila apakah termasuk subjek hukum? Apaka...,Ilmu hukum pidana mengenal adanya alasan pengh...,Pertanyaan yang sangat menarik dan kompleks me...
2,Suami saya tersangkut kasus pencurian dengan p...,Tahanan adalah tersangka atau terdakwa yang di...,Situasi yang Anda hadapi sangat berat dan memb...


In [18]:
csv_path = "/workspace/gemma_generations_lora.csv"
df = pd.read_csv(csv_path)
assert {"question","answer","ground_truth"}.issubset(df.columns), f"Missing columns: {df.columns.tolist()}"

judge_llm = ChatGoogleGenerativeAI(
    model=os.environ["GEMINI_MODEL"],
    google_api_key=os.environ["GOOGLE_API_KEY"],
)
llm = LangchainLLMWrapper(judge_llm)

emb = embedding_factory(
    provider="huggingface",
    model="sentence-transformers/all-MiniLM-L6-v2"
)

ragas_ds = Dataset.from_pandas(df[["question","answer","ground_truth"]])

executor = evaluate(
    ragas_ds,
    metrics=[answer_similarity],
    llm=llm,
    embeddings=emb,
    show_progress=True,
    return_executor=True,
)

raw = None
if hasattr(executor, "results"):
    try:
        raw = executor.results()
    except Exception:
        raw = executor.results
else:
    raw = executor

if not isinstance(raw, list):
    raise RuntimeError(f"Expected a list of floats but got {type(raw)}")

per_row_df = pd.DataFrame(raw, columns=["answer_similarity_score"])
print("Per-row dataframe shape:", per_row_df.shape)
print(per_row_df.head(3))

overall_df = per_row_df.mean().to_frame().T
overall_df.columns = [c + "_mean" for c in overall_df.columns]

overall_df.to_csv("/workspace/gemma_ragas_overall_lora.csv", index=False)
per_row_df.to_csv("/workspace/gemma_ragas_per_row_lora.csv", index=False)

print("Saved:")
print("  - /workspace/gemma_ragas_overall_lora.csv")
print("  - /workspace/gemma_ragas_per_row_lora.csv")

overall_df


  llm = LangchainLLMWrapper(judge_llm)
Evaluating: 100%|██████████| 96/96 [00:03<00:00, 30.75it/s]

Per-row dataframe shape: (96, 1)
   answer_similarity_score
0                 0.773469
1                 0.666069
2                 0.747106
Saved:
  - /workspace/gemma_ragas_overall_lora.csv
  - /workspace/gemma_ragas_per_row_lora.csv





Unnamed: 0,answer_similarity_score_mean
0,0.743518


In [19]:
import pandas as pd
import re
from collections import Counter

# --- File tunggal: Gemma LoRA ---
paths = {
    "Gemma-3-4B-it-LoRA": "/workspace/gemma_generations_lora.csv",
}

# --- Text stats helper ---
def text_stats(df):
    answers = df["answer"].fillna("").astype(str)
    # total words
    word_counts = answers.apply(lambda t: len(re.findall(r"\w+(?:'\w+)?", t)))
    avg_words = word_counts.mean()

    # type–token ratio (unique words / total words per answer, lalu dirata)
    def ttr(text):
        tokens = re.findall(r"\w+(?:'\w+)?", text.lower())
        return len(set(tokens)) / len(tokens) if tokens else 0
    ttrs = answers.apply(ttr)
    avg_ttr = ttrs.mean()

    # persentase jawaban kosong
    empties_pct = 100 * (answers.str.strip() == "").mean()

    # persentase jawaban duplikat persis
    duplicate_pct = 100 * (1 - answers.nunique() / len(answers)) if len(answers) else 0

    return {
        "avg_words": avg_words,
        "avg_type_token_ratio": avg_ttr,
        "answer_empties_%": empties_pct,
        "exact_duplicate_answers_%": duplicate_pct,
        "n_rows": len(answers)
    }

# --- Jalankan untuk Gemma LoRA ---
rows = []
for name, path in paths.items():
    try:
        df = pd.read_csv(path)
        if "answer" not in df.columns:
            raise ValueError(f"'answer' column not found in {path}")
        metrics = text_stats(df)
        metrics["model"] = name
        rows.append(metrics)
    except Exception as e:
        print(f"[skip] {name}: {e}")

summary = pd.DataFrame(rows)
summary = summary[
    ["model", "n_rows", "avg_words", "avg_type_token_ratio", "answer_empties_%", "exact_duplicate_answers_%"]
]

print(summary.round(3))

# Simpan hasil
summary.to_csv("/workspace/model_text_metrics_summary_lora.csv", index=False)
print("\nSaved /workspace/model_text_metrics_summary_lora.csv")


                model  n_rows  avg_words  avg_type_token_ratio  \
0  Gemma-3-4B-it-LoRA      96    488.562                 0.428   

   answer_empties_%  exact_duplicate_answers_%  
0               0.0                        0.0  

Saved /workspace/model_text_metrics_summary_lora.csv
