In [26]:
import requests
import pandas as pd

config = {
    "dataset_name": "antique/test/non-offensive",
    "dataset_name2": "antique_test",
    "tfidf_model_path": "../models/tfidf/antique_test_tfidf.joblib",
    "tfidf_vector_path": "../models/tfidf/antique_test_tfidf_vectors.joblib",
    "svd_path": "../models/tfidf/antique_test_svd.joblib",
    "bert_model_path": "../models/embeddings/antique_test_bert.joblib",
    "bert_vector_path": "../models/embeddings/antique_test_vectors.joblib",
    "hybrid_vector_path": "../models/hybrid/antique_test_hybrid_vectors.joblib",
    "index_dir": "../data/index_antique_test",
    "faiss_tfidf_index_path": "../models/faiss/antique_test_tfidf_faiss.index",
    "faiss_bert_index_path": "../models/faiss/antique_test_bert_faiss.index",
    "faiss_hybrid_index_path": "../models/faiss/antique_test_hybrid_faiss.index",
    "sample_size": 20,
    "alpha": 0.1,
    "k":500
}

In [27]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_tfidf_model",
    json={
        "dataset_name": config["dataset_name"],
        "tfidf_model_path": config["tfidf_model_path"],
        "tfidf_vector_path": config["tfidf_vector_path"],
         "index_dir": config["index_dir"],
        "sample_size": config["sample_size"]
    }
)
response.raise_for_status()
result = response.json()
print("\nTF-IDF Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



TF-IDF Model Evaluation:
Average Metrics:
               TF-IDF
precision@10  0.41500
recall        0.66504
mrr           0.75625
ap            0.25423

Sample Results (First 5 rows):
   precision@10    recall  mrr        ap query_id   model
0           0.8  0.827586  0.5  0.363666   312215  TF-IDF
1           0.3  0.800000  1.0  0.181382  1862795  TF-IDF
2           0.6  0.350000  1.0  0.122333   922849  TF-IDF
3           0.7  0.678571  1.0  0.385356  1937374  TF-IDF
4           0.4  0.612903  1.0  0.206902   225575  TF-IDF


In [28]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_bert_model",
    json={
        "dataset_name": config["dataset_name"],
        "bert_model_path": config["bert_model_path"],
        "bert_vector_path": config["bert_vector_path"],
        "index_dir": config["index_dir"],
        "sample_size": config["sample_size"]
    }
)
response.raise_for_status()
result = response.json()
print("\nBERT Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



BERT Model Evaluation:
Average Metrics:
                  BERT
precision@10  0.385000
recall        0.581436
mrr           0.741341
ap            0.196123

Sample Results (First 5 rows):
   precision@10    recall  mrr        ap query_id model
0           0.6  0.585366  1.0  0.269385   421753  BERT
1           0.3  0.483871  1.0  0.139341   225575  BERT
2           0.7  0.548387  1.0  0.314324  2815090  BERT
3           0.2  0.472222  1.0  0.066440  3239329  BERT
4           0.5  0.714286  1.0  0.301803  3639660  BERT


In [34]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_hybrid_model",
    json={
        "dataset_name": config["dataset_name"],
        "tfidf_model_path": config["tfidf_model_path"],
        "tfidf_vector_path": config["tfidf_vector_path"],
        "svd_path": config["svd_path"],
        "bert_model_path": config["bert_model_path"],
        "bert_vector_path": config["bert_vector_path"],
        "hybrid_vector_path": config["hybrid_vector_path"],
        "index_dir": config["index_dir"],
        "sample_size": config["sample_size"],
        "alpha": config["alpha"]
    }
)
response.raise_for_status()
result = response.json()
print("\nHybrid Model Evaluation:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head().to_string())



Hybrid Model Evaluation:
Average Metrics:
                Hybrid
precision@10  0.390000
recall        0.479537
mrr           0.820833
ap            0.174912

Sample Results (First 5 rows):
   precision@10    recall   mrr        ap query_id   model
0           0.4  0.571429  1.00  0.139080  3874326  Hybrid
1           0.7  0.566667  1.00  0.285572   443848  Hybrid
2           0.4  0.555556  1.00  0.391270  3382736  Hybrid
3           0.2  0.324324  0.25  0.032770  1582877  Hybrid
4           0.3  0.782609  1.00  0.208819  2528767  Hybrid


In [32]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_all_models",
    json=config
    )
response.raise_for_status()
result = response.json()
print("\nEvaluation Results:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head())


Evaluation Results:
Average Metrics:
                TF-IDF      BERT    Hybrid
precision@10  0.460000  0.515000  0.485000
recall        0.625178  0.628769  0.604609
mrr           0.866899  0.836446  0.790556
ap            0.218502  0.268036  0.226296

Sample Results (First 5 rows):
   precision@10    recall       mrr        ap query_id   model
0           0.7  0.781250  0.500000  0.332218  1119420  TF-IDF
1           0.6  0.500000  1.000000  0.287465  4190287  TF-IDF
2           0.6  0.750000  1.000000  0.371613   558570  TF-IDF
3           0.3  0.793103  0.333333  0.216720  3301173  TF-IDF
4           0.3  0.743590  0.500000  0.202400  3074429  TF-IDF


In [24]:

response = requests.post(
    "http://localhost:8000/evaluation/evaluate_all_models_with_faiss",
    json=config
    )
response.raise_for_status()
result = response.json()
print("\nEvaluation Results:")
print("Average Metrics:")
print(pd.DataFrame(result["average_metrics"]))
print("\nSample Results (First 5 rows):")
print(pd.DataFrame(result["results"]).head())


Evaluation Results:
Average Metrics:
              faiss_tfidf  faiss_bert  faiss_hybrid
precision@10     0.008523    0.032045      0.036375
recall           0.130085    0.505104      0.568048
mrr              0.205551    0.800214      0.766854
ap               0.022056    0.196733      0.205155

Sample Results (First 5 rows):
   precision@10    recall       mrr        ap query_id        model
0         0.000  0.000000  0.000000  0.000000  2180086  faiss_tfidf
1         0.012  0.230769  0.500000  0.049673  1077370  faiss_tfidf
2         0.006  0.066667  0.043478  0.001497   788976  faiss_tfidf
3         0.028  0.411765  0.142857  0.028633  3507491  faiss_tfidf
4         0.014  0.189189  0.071429  0.006143  1582877  faiss_tfidf
