In [3]:
import requests
import json
import pandas as pd
from ranx import Run, Qrels, evaluate
import os
import time

# ===== الإعداد =====
dataset = "trec_tot"  # ← غيّرها إلى "antique" لاحقًا للمجموعة الثانية
top_k = 10
queries_file = f"data/{dataset}/{dataset}_queries.csv"
qrels_file = f"data/{dataset}/{dataset}_qrels.csv"

# ===== تحميل الاستعلامات =====
df_queries = pd.read_csv(queries_file)
print(f"✅ تم تحميل {len(df_queries)} استعلام")

# ===== وظائف الطلب =====
def refine_query(query: str):
    url = "http://127.0.0.1:8000/refine-query"
    payload = {
        "query": query,
        "options": {
            "spelling_correction": True,
            "query_expansion": True,
            "query_suggestion": True
        }
    }
    response = requests.post(f"{url}?dataset={dataset}", json=payload)
    return response.json()

def search_query(query: str):
    url = "http://127.0.0.1:8001/query-match"
    response = requests.get(
        url,
        params={"query": query, "dataset": dataset, "top_k": top_k}
    )
    return response.json()["results"]

# ===== التجربة الأولى: بدون تحسين الاستعلام =====
run_before = {}
start = time.time()
for _, row in df_queries.iterrows():
    qid = str(row["query_id"])
    query_text = row["query_text"]
    results = search_query(query_text)
    run_before[qid] = {doc["doc_id"]: doc["score"] for doc in results}
end = time.time()
print(f"⌛ الزمن المستغرق بدون تحسين: {end - start:.2f} ثانية")

# ===== التجربة الثانية: بعد تحسين الاستعلام =====
run_after = {}
start = time.time()
for _, row in df_queries.iterrows():
    qid = str(row["query_id"])
    query_text = row["query_text"]
    refined = refine_query(query_text)["refined_query"]
    results = search_query(refined)
    run_after[qid] = {doc["doc_id"]: doc["score"] for doc in results}
end = time.time()
print(f"⌛ الزمن المستغرق بعد تحسين: {end - start:.2f} ثانية")

# ===== التقييم باستخدام ranx =====
qrels = Qrels.from_file(qrels_file, kind="trec")
run_before_r = Run(run_before, name="Before Refinement")
run_after_r = Run(run_after, name="After Refinement")

results = evaluate(
    qrels=qrels,
    runs=[run_before_r, run_after_r],
    metrics=["map@10", "ndcg@10", "recall@10"]
)
results_df = pd.DataFrame(results).T
results_df


✅ تم تحميل 150 استعلام


KeyError: 'results'

In [4]:
import requests
import pandas as pd
import time
from ranx import Run, Qrels, evaluate
import json
import os

# 🟦 إعداد المسارات
dataset = "trec_tot"  # ← غيّر لاحقًا إلى "antique"
top_k = 10

queries_file = f"data/{dataset}/{dataset}_queries.csv"
qrels_file = f"data/{dataset}/{dataset}_qrels.tsv"

# 🟦 تحميل الاستعلامات
df_queries = pd.read_csv(queries_file)
print(f"✅ Loaded {len(df_queries)} queries")

# 🟦 خدمات خارجية
def refine_query(query_text):
    url = "http://127.0.0.1:8000/refine-query"
    payload = {
        "query": query_text,
        "options": {
            "spelling_correction": True,
            "query_expansion": True,
            "query_suggestion": True
        }
    }
    response = requests.post(f"{url}?dataset={dataset}", json=payload)
    return response.json()["refined_query"]

def search_query(query_text):
    url = "http://127.0.0.1:8001/query-matching"
    response = requests.get(url, params={"query": query_text, "dataset": dataset, "top_k": top_k})
    return response.json()["results"]

# 🟨 تنفيذ البحث قبل التحسين
run_before = {}
start_time = time.time()
for _, row in df_queries.iterrows():
    qid = str(row["query_id"])
    query = row["query"]
    try:
        results = search_query(query)
        run_before[qid] = {res["doc_id"]: res["score"] for res in results}
    except Exception as e:
        print(f"❌ خطأ في الاستعلام {qid}: {e}")
end_time = time.time()
print(f"⏱️ زمن البحث بدون تحسين: {end_time - start_time:.2f} ثواني")

# 🟩 تنفيذ البحث بعد التحسين
run_after = {}
start_time = time.time()
for _, row in df_queries.iterrows():
    qid = str(row["query_id"])
    query = row["query"]
    try:
        refined_query = refine_query(query)
        results = search_query(refined_query)
        run_after[qid] = {res["doc_id"]: res["score"] for res in results}
    except Exception as e:
        print(f"❌ خطأ في الاستعلام {qid}: {e}")
end_time = time.time()
print(f"⏱️ زمن البحث بعد تحسين الاستعلام: {end_time - start_time:.2f} ثواني")

# 🟦 حفظ النتائج بصيغة JSON (اختياري)
output_dir = f"results/{dataset}"
os.makedirs(output_dir, exist_ok=True)

with open(f"{output_dir}/run_before_refinement.json", "w", encoding="utf-8") as f:
    json.dump(run_before, f, indent=2)

with open(f"{output_dir}/run_after_refinement.json", "w", encoding="utf-8") as f:
    json.dump(run_after, f, indent=2)

print("✅ تم حفظ النتائج في مجلد:", output_dir)

# 🟦 التقييم باستخدام ranx
qrels = Qrels.from_file(qrels_file, kind="trec")
run_b = Run(run_before, name="Before_Refinement")
run_a = Run(run_after, name="After_Refinement")

results = evaluate(
    qrels=qrels,
    runs=[run_b, run_a],
    metrics=["map@10", "ndcg@10", "recall@10"]
)

# 🟩 عرض النتائج كمقارنة
pd.DataFrame(results).T


✅ Loaded 150 queries


KeyError: 'query'

In [5]:
pip install httpx


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [7]:

import pandas as pd




# 🟦 إعداد المسارات
dataset = "trec_tot"  # ← غيّر لاحقًا إلى "antique"
top_k = 10

queries_file = f"data/{dataset}/{dataset}_queries.csv"


# 🟦 تحميل الاستعلامات
df_queries = pd.read_csv(queries_file)
print(df_queries)

     query_id                                         query_text
0         152  Movie from  the early 2000s I believe about th...
1         531  Alright so I saw this movie sometime in the ea...
2         473  This is an older 80 s movie. Maybe early 90s. ...
3         659  I don’t really have any memory of this film. I...
4        1095  I remember seeing a trailer for a fantasy movi...
..        ...                                                ...
145      1077  So, the movie is about this weird virus or som...
146       834  I remember a  movie this is what I can remembe...
147       325  There was a movie I saw on tv sometime in the ...
148       385  I’m trying to find the name of a film, whereby...
149       521  There’s an old one I got to see just once as a...

[150 rows x 2 columns]
