In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# === Path Folder ===
base_path = "/content/drive/My Drive/CBR_Project"
json_path = os.path.join(base_path, "data/processed/cases_all.json")
query_path = os.path.join(base_path, "data/eval/queries.json")

# === Load Data Kasus Lama ===
with open(json_path, "r", encoding="utf-8") as f:
    cases_data = json.load(f)

case_texts = [c["ringkasan_fakta"] if c["ringkasan_fakta"] else c["text_full"] for c in cases_data]
case_ids = [c["case_id"] for c in cases_data]

# === Load Query Uji ===
with open(query_path, "r", encoding="utf-8") as f:
    queries = json.load(f)

# === TF-IDF Vectorizer ===
vectorizer = TfidfVectorizer(max_features=5000)
tfidf_matrix = vectorizer.fit_transform(case_texts)  # Matriks TF-IDF semua kasus

# === Fungsi Retrieval ===
def retrieve(query: str, k: int = 5) -> list:
    query_vec = vectorizer.transform([query])
    scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_indices = scores.argsort()[::-1][:k]
    top_case_ids = [case_ids[i] for i in top_indices]
    return top_case_ids

# === Evaluasi Awal ===
print("🔍 Evaluasi 5-10 Query:")
print("="*60)
for q in queries:
    result = retrieve(q["query_text"], k=5)
    hit = any(gt in result for gt in q["ground_truth"])
    print(f"📌 Query ID: {q['query_id']}")
    print(f"🔎 Top-5 Hasil: {result}")
    print(f"✅ Ground Truth: {q['ground_truth']}")
    print(f"🎯 Hit: {hit}")
    print("-"*60)

print("✅ Evaluasi selesai.")


🔍 Evaluasi 5-10 Query:
📌 Query ID: 1
🔎 Top-5 Hasil: [1, 6, 22, 28, 9]
✅ Ground Truth: [1]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 2
🔎 Top-5 Hasil: [2, 9, 1, 19, 18]
✅ Ground Truth: [2]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 3
🔎 Top-5 Hasil: [3, 18, 19, 20, 9]
✅ Ground Truth: [3]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 4
🔎 Top-5 Hasil: [4, 21, 13, 19, 20]
✅ Ground Truth: [4]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 5
🔎 Top-5 Hasil: [5, 19, 18, 3, 20]
✅ Ground Truth: [5]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 6
🔎 Top-5 Hasil: [6, 1, 22, 28, 19]
✅ Ground Truth: [6]
🎯 Hit: True
------------------------------------------------------------
📌 Query ID: 7
🔎 Top-5 Hasil: [7, 19, 20, 17, 16]
✅ Ground Truth: [7]
🎯 Hit: True
---------------------------------------------------