In [None]:
!pip install -q sentence-transformers faiss-cpu requests tqdm

In [None]:
import os, json, faiss, requests, numpy as np
from sentence_transformers import SentenceTransformer
from tqdm import tqdm


In [None]:
INDEX_DIR = "/workspace/index_uu"  # path index.faiss & metadata.jsonl
VLLM_BASE = "http://127.0.0.1:8002"
MODEL_NAME = "google/gemma-3-4b-it"
LORA_NAME = "hukum"
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
TOP_K = 2
MAX_TOKENS = 1024

SYSTEM_PROMPT = """
Anda adalah asisten hukum profesional bergaya penulisan seperti artikel di Hukumonline.
Jika informasi tidak ada di konteks, tuliskan:
"Tidak ditemukan dalam konteks yang tersedia."
Tulislah jawaban dengan struktur analitis, lengkap, dan informatif, mencakup:
1. Pendahuluan singkat konteks hukum.
2. Penjelasan isi pasal/ayat yang relevan (kutip langsung jika ada).
3. Penjabaran logika hukum dan interpretasinya.
4. Poin-poin penting atau langkah hukum jika diperlukan.
5. Bagian 'Dasar Hukum' di akhir, mencantumkan peraturan yang dikutip.
6. Akhiri dengan kalimat sopan seperti 'Demikian penjelasan kami, semoga bermanfaat.'

Gaya bahasa:
- Gunakan bahasa hukum formal, sistematis, dan mudah dipahami masyarakat umum.
- Hindari opini pribadi atau spekulasi.
- Jika konteks tidak ditemukan, jawab: "Berdasarkan konteks yang tersedia, informasi terkait belum ditemukan."
"""

USER_TEMPLATE = """PERTANYAAN:
{question}

KONTEKS TERKAIT:
{context}

Sumber:
{sources}

Instruksi:
- Susun jawaban menyerupai artikel hukum online yang lengkap dan berurutan.
- Gunakan format berikut (bisa disesuaikan):

PENJELASAN:
(berikan uraian dan analisis hukum berdasarkan konteks)

DASAR HUKUM:
- Sebutkan UU, Pasal, dan peraturan yang relevan secara bernomor.

CATATAN:
Seluruh informasi hukum ini bersifat edukatif dan umum, bukan nasihat hukum spesifik.
Untuk kasus konkret, konsultasikan kepada advokat atau konsultan hukum berizin.
"""


In [None]:
def load_index(index_dir):
    index = faiss.read_index(os.path.join(index_dir, "index.faiss"))
    with open(os.path.join(index_dir, "metadata.jsonl"), "r", encoding="utf-8") as f:
        meta = [json.loads(l) for l in f]
    print(f"Loaded {len(meta)} chunks | dim={index.d}")
    return index, meta

index, meta = load_index(INDEX_DIR)
encoder = SentenceTransformer(EMBED_MODEL)


In [None]:
def search(query, index, meta, encoder, k=6):
    qv = encoder.encode([query], normalize_embeddings=True).astype("float32")
    D, I = index.search(qv, k)
    hits = []
    for rank, (score, idx) in enumerate(zip(D[0], I[0]), 1):
        r = meta[idx].copy()
        r["rank"] = rank
        r["score"] = float(score)
        hits.append(r)
    return hits

def build_context_blocks(hits):
    blocks, sources = [], []
    for h in hits:
        tag = h.get("number") or h.get("case_number") or h.get("title","")
        blocks.append(f"[C{h['rank']}] ({h.get('doc_type','')}) {h['text']}")
        sources.append(f"[S{h['rank']}] {tag} ‚Äî {h.get('url','')}")
    return "\n\n".join(blocks), "\n".join(sources)


In [None]:
def ask_vllm(question, context, sources):
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_TEMPLATE.format(
                question=question, context=context, sources=sources)}
        ],
        "temperature": 0.1,
        "top_p": 0.9,
        "max_tokens": MAX_TOKENS,
    }
    r = requests.post(f"{VLLM_BASE}/v1/chat/completions", json=payload, timeout=180)
    r.raise_for_status()
    return r.json()["choices"][0]["message"]["content"]


In [None]:
# Ganti pertanyaan sesuai kebutuhan
question = "Apa pendapat hukum indonesia mengenai perlindungan anak?"

# 1) Retrieve
hits = search(question, index, meta, encoder, k=TOP_K)
context, sources = build_context_blocks(hits)

# 2) Generate
answer = ask_vllm(question, context, sources)

# 3) Show
from IPython.display import Markdown
display(Markdown(f"### **Pertanyaan:** {question}\n\n---\n\n{answer}\n\n---\n\n**Sumber:**\n{sources}"))


In [None]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
import pandas as pd
import json, os, time

from ragas import evaluate
from ragas.metrics import answer_similarity 
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings.base import embedding_factory
from langchain_google_genai import ChatGoogleGenerativeAI

In [None]:
# Lokasi index & vLLM mengikuti yang sudah kamu pakai
INDEX_DIR = INDEX_DIR  # gunakan variabel dari cell sebelumnya
VLLM_BASE = VLLM_BASE
MODEL_NAME = MODEL_NAME
EMBED_MODEL = EMBED_MODEL
os.environ["DATA_PATH"] = "/workspace/QAs_Hukumonline_Test.json"  # uploaded path

# Output
EVAL_OUTPUT_CSV = "/workspace/ragas_results.csv"

# Retrieval & generasi
TOP_K = 2
MAX_TOKENS = MAX_TOKENS  # gunakan dari cell sebelumnya


In [None]:
os.environ["GEMINI_MODEL"] = "gemini-2.5-flash"

assert "GOOGLE_API_KEY" in os.environ, "Set GOOGLE_API_KEY in env"
print("Config ready")

In [None]:
with open(os.environ["DATA_PATH"], "r", encoding="utf-8") as f:
    raw = json.load(f)

df = pd.DataFrame(raw).rename(columns={"instruction":"question","response":"ground_truth"})
print(f"Rows: {len(df)}")
df.head(3)


In [None]:
VLLM_URL = VLLM_BASE
MODEL    = MODEL_NAME
LORA     = LORA_NAME

answers = []
contexts_used = []

start = time.time()
for q in tqdm(df["question"].tolist(), desc="Generating RAG answers (Gemma 3 4B + LoRA Hukum)"):

    # üîπ 1) Retrieve context from FAISS
    hits = search(q, index, meta, encoder, k=TOP_K)     # ‚Üê from your RAG notebook
    context, sources = build_context_blocks(hits)        # join text + sumber
    contexts_used.append(context)

    # üîπ 2) Build user prompt (includes question + context)
    user_prompt = USER_TEMPLATE.format(question=q, context=context, sources=sources)

    # üîπ 3) Send to vLLM
    payload = {
        "model": MODEL,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt}
        ],
        "temperature": 0.1,
        "max_tokens": 1280,
    }

    r = requests.post(f"{VLLM_URL}/v1/chat/completions", json=payload, timeout=300)
    r.raise_for_status()
    answers.append(r.json()["choices"][0]["message"]["content"])

end = time.time()
elapsed = end - start

df["context"] = contexts_used
df["answer"] = answers
out_csv = "/workspace/gemma_rag_generations.csv"
df.to_csv(out_csv, index=False)
print("Saved", out_csv)
print(f"Total time: {elapsed:.2f} seconds")
print(f"Average per question: {elapsed / max(1,len(df)):.2f} seconds")

df.head(3)
