In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import json
import csv
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# === Path Folder ===
base_path = "/content/drive/My Drive/CBR_Project"
query_path = os.path.join(base_path, "data/eval/queries.json")
prediction_path = os.path.join(base_path, "data/results/predictions.csv")
retrieval_output = os.path.join(base_path, "data/eval/retrieval_metrics.csv")
prediction_output = os.path.join(base_path, "data/eval/prediction_metrics.csv")
os.makedirs(os.path.dirname(retrieval_output), exist_ok=True)

# === Load Query + Ground Truth
with open(query_path, "r", encoding="utf-8") as f:
    queries = json.load(f)

# === Load Prediksi
preds = {}
with open(prediction_path, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        preds[int(row["query_id"])] = {
            "predicted_solution": row["predicted_solution"],
            "top_5_case_ids": list(map(int, row["top_5_case_ids"].split(",")))
        }

# === Evaluasi Retrieval (Top-K Hit atau Tidak)
retrieval_rows = []
retrieval_hits = []

for q in queries:
    qid = q["query_id"]
    ground_truth = q["ground_truth"]
    top_k = preds[qid]["top_5_case_ids"]

    hit = any(gt in top_k for gt in ground_truth)
    retrieval_hits.append(int(hit))
    retrieval_rows.append({
        "query_id": qid,
        "ground_truth": ground_truth,
        "top_5_case_ids": top_k,
        "hit": hit
    })

# Simpan hasil evaluasi retrieval
with open(retrieval_output, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["query_id", "ground_truth", "top_5_case_ids", "hit"])
    writer.writeheader()
    for row in retrieval_rows:
        writer.writerow(row)

# === Evaluasi Prediksi Solusi (Kalau ada actual_solution)
y_true = []
y_pred = []
prediction_rows = []

for q in queries:
    if "actual_solution" in q:
        qid = q["query_id"]
        actual = q["actual_solution"].strip().lower()
        predicted = preds[qid]["predicted_solution"].strip().lower()
        y_true.append(actual)
        y_pred.append(predicted)

        prediction_rows.append({
            "query_id": qid,
            "actual_solution": actual,
            "predicted_solution": predicted,
            "match": actual == predicted
        })

# Simpan hasil evaluasi prediksi
with open(prediction_output, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["query_id", "actual_solution", "predicted_solution", "match"])
    writer.writeheader()
    for row in prediction_rows:
        writer.writerow(row)

# === Print Ringkasan Evaluasi
print("=== 📊 Evaluasi Retrieval ===")
accuracy = sum(retrieval_hits) / len(retrieval_hits)
print(f"Top-5 Hit Accuracy: {accuracy:.2f}")
print(f"Detail disimpan di: {retrieval_output}")

if y_true:
    print("\n=== 🧠 Evaluasi Prediksi Solusi ===")
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average="micro")
    rec = recall_score(y_true, y_pred, average="micro")
    f1 = f1_score(y_true, y_pred, average="micro")

    print(f"Accuracy : {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall   : {rec:.2f}")
    print(f"F1-Score : {f1:.2f}")
    print(f"Detail disimpan di: {prediction_output}")
else:
    print("⚠️ Tidak ada actual_solution dalam queries.json → evaluasi solusi dilewati.")


=== 📊 Evaluasi Retrieval ===
Top-5 Hit Accuracy: 1.00
Detail disimpan di: /content/drive/My Drive/CBR_Project/data/eval/retrieval_metrics.csv

=== 🧠 Evaluasi Prediksi Solusi ===
Accuracy : 1.00
Precision: 1.00
Recall   : 1.00
F1-Score : 1.00
Detail disimpan di: /content/drive/My Drive/CBR_Project/data/eval/prediction_metrics.csv
