In [1]:
import requests
import pandas as pd

config = {
    "dataset_name": "beir/quora/dev",
    "dataset_name2": "beir_quora",
    "tfidf_model_path": "../models/tfidf/beir_quora_tfidf.joblib",
    "tfidf_vector_path": "../models/tfidf/beir_quora_tfidf_vectors.joblib",
    "svd_path": "../models/tfidf/beir_quora_svd.joblib",
    "bert_model_path": "../models/embeddings/beir_quora_bert.joblib",
    "bert_vector_path": "../models/embeddings/beir_quora_vectors.joblib",
    "hybrid_vector_path": "../models/hybrid/beir_quora_hybrid_vectors.joblib",
    "index_dir": "../data/index_beir_quora",
    "faiss_tfidf_index_path": "../models/faiss/beir_quora_tfidf_faiss.index",
    "faiss_bert_index_path": "../models/faiss/beir_quora_bert_faiss.index",
    "faiss_hybrid_index_path": "../models/faiss/beir_quora_hybrid_faiss.index",
    "sample_size": 200,
    "alpha": 0.2,
    "k":500
}

In [7]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_tfidf_model",
    json={
        "dataset_name": config["dataset_name"],
        "tfidf_model_path": config["tfidf_model_path"],
        "tfidf_vector_path": config["tfidf_vector_path"],
        "index_dir": config["index_dir"],
        "sample_size": config["sample_size"]
    }
)
response.raise_for_status()
result = response.json()
print("\nTF-IDF Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



TF-IDF Model Evaluation:
Average Metrics:
                TF-IDF
precision@10  0.051000
recall        0.395405
mrr           0.353524
ap            0.311820

Sample Results (First 5 rows):
   precision@10    recall  mrr        ap query_id   model
0           0.6  0.857143  1.0  0.836735   128201  TF-IDF
1           0.1  1.000000  1.0  1.000000     2254  TF-IDF
2           0.1  0.333333  1.0  0.333333    28695  TF-IDF
3           0.0  0.000000  0.0  0.000000   333644  TF-IDF
4           0.0  0.000000  0.0  0.000000   278122  TF-IDF


In [25]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_bert_model",
    json={
        "dataset_name": config["dataset_name"],
        "bert_model_path": config["bert_model_path"],
        "bert_vector_path": config["bert_vector_path"],
        "index_dir": config["index_dir"],
        "sample_size": config["sample_size"]
    }
)
response.raise_for_status()
result = response.json()
print("\nBERT Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



BERT Model Evaluation:
Average Metrics:
                  BERT
precision@10  0.136000
recall        0.990000
mrr           0.899707
ap            0.846769

Sample Results (First 5 rows):
   precision@10  recall  mrr   ap query_id model
0           0.1     1.0  1.0  1.0   454828  BERT
1           0.0     0.0  0.0  0.0   469466  BERT
2           0.1     1.0  1.0  1.0   154114  BERT
3           0.1     1.0  1.0  1.0   325102  BERT
4           0.1     1.0  1.0  1.0   416984  BERT


In [8]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_hybrid_model",
    json={
        "dataset_name": config["dataset_name"],
        "tfidf_model_path": config["tfidf_model_path"],
        "tfidf_vector_path": config["tfidf_vector_path"],
        "svd_path": config["svd_path"],
        "bert_model_path": config["bert_model_path"],
        "bert_vector_path": config["bert_vector_path"],
        "hybrid_vector_path": config["hybrid_vector_path"],
        "index_dir": config["index_dir"],
        "sample_size": config["sample_size"],
        "alpha": config["alpha"]
    }
)
response.raise_for_status()
result = response.json()
print("\nHybrid Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



Hybrid Model Evaluation:
Average Metrics:
                Hybrid
precision@10  0.128000
recall        0.934167
mrr           0.827381
ap            0.775717

Sample Results (First 5 rows):
   precision@10  recall  mrr   ap query_id          model
0           0.1     1.0  1.0  1.0   377234  Hybrid_Fusion
1           0.1     1.0  0.5  0.5   205307  Hybrid_Fusion
2           0.1     1.0  1.0  1.0    55573  Hybrid_Fusion
3           0.1     1.0  1.0  1.0   237344  Hybrid_Fusion
4           0.1     1.0  1.0  1.0   443490  Hybrid_Fusion


In [3]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_all_models",
    json=config
    )
response.raise_for_status()
result = response.json()
print("\nEvaluation Results:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head())


Evaluation Results:
Average Metrics:
                TF-IDF      BERT    Hybrid
precision@10  0.108000  0.122000  0.123500
recall        0.980706  0.999167  0.999444
mrr           0.734416  0.838912  0.836054
ap            0.687154  0.808340  0.796959

Sample Results (First 5 rows):
   precision@10  recall  mrr    ap query_id   model
0           0.0     0.0  0.0  0.00   339862  TF-IDF
1           0.1     1.0  1.0  1.00   490116  TF-IDF
2           0.1     1.0  1.0  1.00   150050  TF-IDF
3           0.1     1.0  1.0  1.00   135844  TF-IDF
4           0.2     1.0  1.0  0.75   392707  TF-IDF


In [2]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_all_models_with_faiss",
    json=config
    )
response.raise_for_status()
result = response.json()
print("\nEvaluation Results:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head())


Evaluation Results:
Average Metrics:
              faiss_tfidf  faiss_bert  faiss_hybrid
precision@10     0.002180    0.003270      0.003030
recall           0.751538    1.000000      1.000000
mrr              0.406865    0.881961      0.826846
ap               0.364639    0.849254      0.798539

Sample Results (First 5 rows):
   precision@10  recall  mrr        ap query_id        model
0         0.002     1.0  1.0  1.000000   482241  faiss_tfidf
1         0.002     1.0  1.0  1.000000   237962  faiss_tfidf
2         0.004     1.0  0.5  0.267241    69051  faiss_tfidf
3         0.004     0.5  0.5  0.129902    73724  faiss_tfidf
4         0.004     1.0  0.5  0.263699   100970  faiss_tfidf
