In [None]:
# SOLUTION_REUSE.ipynb
import pandas as pd
import numpy as np
# from retrieval import bert_retriever  # Remove this import - already done in a previous turn

# Load data
df = pd.read_csv('data/processed/cases.csv')

# Buat mapping case_id ke amar putusan
case_solutions = dict(zip(df['case_id'], df['amar']))

# Get the retriever function from cell jZzAsn0LuzvV
# Make sure cell jZzAsn0LuzvV has been executed
retriever_func = bert_retrieval()

def predict_outcome(query, k=5):
    # 1. Retrieve top-k cases
    # Call the retriever function obtained from bert_retrieval()
    top_k_cases = retriever_func(query, k=k)

    # 2. Ambil solusi dari kasus-kasur tersebut
    solutions = [case_solutions[case_id] for case_id in top_k_cases if case_id in case_solutions]

    # 3. Terapkan majority voting
    # (Dalam implementasi nyata, bisa menggunakan pendekatan yang lebih canggih)
    if not solutions:
        return "Tidak dapat memprediksi"

    # Hitung solusi yang paling sering muncul
    solution_counts = {}
    for sol in solutions:
        simplified = simplify_solution(sol)
        solution_counts[simplified] = solution_counts.get(simplified, 0) + 1

    # Ambil solusi dengan count tertinggi
    predicted_solution = max(solution_counts.items(), key=lambda x: x[1])[0]

    return predicted_solution


def simplify_solution(solution):
    """Simplifikasi teks amar putusan"""
    if pd.isna(solution): # Handle potential NaN values
        return "Lainnya"
    if "dijatuhi pidana" in solution.lower():
        return "Pidana Penjara"
    elif "dibebaskan" in solution.lower():
        return "Bebas"
    elif "dinyatakan tidak terbukti" in solution.lower():
        return "Tidak Terbukti"
    else:
        return "Lainnya"

# Contoh penggunaan
test_queries = [
    "pelaku ditangkap dengan barang bukti sabu 1 gram",
    "terdakwa mengedarkan ganja seberat 500 gram",
    "pemakai narkotika jenis ekstasi"
]

results = []
for query in test_queries:
    prediction = predict_outcome(query)
    results.append({
        'query': query,
        'predicted_solution': prediction
    })

# Simpan hasil prediksi
# Create directory if it doesn't exist
os.makedirs('data/results', exist_ok=True)
results_df = pd.DataFrame(results)
results_df.to_csv('data/results/predictions.csv', index=False)

print("Contoh Prediksi:")
print(results_df.head())