In [1]:
import pandas as pd
import json
from pyserini.search.lucene import LuceneSearcher

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Fungsi untuk print hasil + join dengan DataFrame
def display_results(query, hits, df):
    print(f"\nQuery: {query}")

    if not hits:
        print("Tidak ada dokumen yang sesuai.")
        return pd.DataFrame()

    results_with_scores = []
    for i, hit in enumerate(hits, start=1):
        # Parse JSON asli dari dokumen (karena pakai --storeRaw)
        try:
            raw = json.loads(hit.raw)
            docid = str(raw.get("id", hit.docid))  # fallback ke docid internal
        except Exception:
            docid = hit.docid

        # Cari di DataFrame
        matching_row = df[df["id"].astype(str) == docid].copy()

        if not matching_row.empty:
            matching_row["score"] = hit.score
            matching_row["rank"] = i
            matching_row["query"] = query
            results_with_scores.append(matching_row)

            # Print ringkas
            print(f"{i}. {matching_row.iloc[0]['title']} (Score: {hit.score:.4f})")
        else:
            print(f"{i}. [ID {docid}] tidak ditemukan di DataFrame (Score: {hit.score:.4f})")

    if results_with_scores:
        return pd.concat(results_with_scores, ignore_index=True)
    else:
        print("No matching documents found in DataFrame.")
        return pd.DataFrame()

In [4]:
df = pd.read_json("json-file/docs.jsonl", lines=True)

# Gunakan index hasil pyserini.index
searcher = LuceneSearcher("my_index")

# Daftar query
queries = [
    "gemini ai",
    "laptop gaming wajib dibeli",
    "cara agar tidak di hack",
    "teknologi canggih sekarang",
    "komputer terbaik",
    "mobile legend"
]

all_results = []

# Jalankan semua query
for q in queries:
    hits = searcher.search(q, k=10)
    result_df = display_results(q, hits, df)
    if not result_df.empty:
        all_results.append(result_df)


Query: gemini ai
1. viral foto polaroid gemini ai bareng medsos bikin contoh prompt (Score: 3.1467)
2. google limit hari gemini ai gratis ai pro ai ultra (Score: 3.1460)
3. gemini ai suntik google drive gambar (Score: 3.1302)
4. viral miniatur ai foto medsos buat via gemini (Score: 3.1248)
5. viral foto polaroid gemini ai orang tua tiada pakai prompt (Score: 3.1212)
6. google sulap chrome browser gemini ai 10 fitur canggih (Score: 3.1140)
7. 8 prompt foto polaroid gemini ai idol kpop tinggal pilih copas (Score: 3.0935)
8. fitur google docs teks ubah audio ai (Score: 3.0927)
9. foto polaroid gemini ai peluk idol kpop viral prompt buat (Score: 3.0848)
10. bikin miniatur ai gerak contoh prompt tarik coba (Score: 3.0809)

Query: laptop gaming wajib dibeli
1. laptop chromebook beda laptop windows (Score: 3.8602)
2. jajal langsung legion 5i 2025 main gim aaa bikin konten hasil (Score: 3.8233)
3. tilik lenovo legion 7i 16iax10 laptop gaming premium workstation profesional (Score: 3.8117)
4. 

In [5]:
# Gabungkan semua hasil
if all_results:
    combined = pd.concat(all_results, ignore_index=True)

    # Pilih hanya kolom sesuai permintaan + query
    output_df = combined[["id", "title", "date", "score", "rank", "query"]]

    print("\n=== Semua Hasil Gabungan ===")
    print(output_df.to_string(index=False))  # tampilkan semua, bukan head saja

    # Opsional: simpan ke Excel
    #df.to_excel("search_results.xlsx", engine="openpyxl")
    #print("\nSemua hasil pencarian disimpan ke search_results.xlsx")
else:
    print("Tidak ada hasil yang ditemukan untuk semua query.")


=== Semua Hasil Gabungan ===
 id                                                                                  title                          date    score  rank                      query
170                        viral foto polaroid gemini ai bareng medsos bikin contoh prompt     Minggu, 14 September 2025 3.146700     1                  gemini ai
277                                     google limit hari gemini ai gratis ai pro ai ultra      Selasa, 9 September 2025 3.146000     2                  gemini ai
496                                                   gemini ai suntik google drive gambar        Jumat, 29 Agustus 2025 3.130200     3                  gemini ai
316                                          viral miniatur ai foto medsos buat via gemini       Senin, 8 September 2025 3.124800     4                  gemini ai
168                             viral foto polaroid gemini ai orang tua tiada pakai prompt     Minggu, 14 September 2025 3.121200     5                  ge