In [None]:
from qdrant_client import QdrantClient

from src.collection.query_collection import (
    filter_search,
    get_semantically_similar_results,
)
from src.utils.utils import load_qdrant_client, load_config
from src.utils.utils import load_model
from src.collection.evaluate_collection import (
    calculate_precision,
    calculate_recall,
    calculate_f1_score,
    calculate_f2_score,
)

from dotenv import load_dotenv
import os
import pickle

load_dotenv()

QDRANT_HOST = os.getenv("QDRANT_HOST")
QDRANT_PORT = os.getenv("QDRANT_PORT")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
HF_MODEL_NAME = os.getenv("HF_MODEL_NAME")

In [None]:
config = load_config("../.config/config.json")
similarity_threshold = float(config.get("similarity_threshold_1"))

with open("../data/regex_ids.pkl", "rb") as f:
    regex_ids = pickle.load(f)

In [None]:
qdrant = load_qdrant_client(QDRANT_HOST, port=QDRANT_PORT)
model = load_model(HF_MODEL_NAME)

In [None]:
query_embedding = model.encode("applications")

In [None]:
results = get_semantically_similar_results(
    client=qdrant,
    collection_name=COLLECTION_NAME,
    query_embedding=query_embedding,
    score_threshold=0.5,
)

In [None]:
results

In [None]:
result_ids = [str(result.id) for result in results]
result_ids

In [None]:
print(len(app_ids), len(set(result_ids) & set(app_ids)), len(result_ids))

In [None]:
app_ids = regex_ids["application"]
apps_ids = regex_ids["applications"]

print(f"intersection: {sorted(list(set(app_ids) & set(apps_ids)))}")
print(f"apps_ids: {sorted(apps_ids)}")
print(f"count of app_ids: {len(app_ids)}")

In [None]:
precision = calculate_precision(result_ids, app_ids)
recall = calculate_recall(result_ids, app_ids)
f1_score = calculate_f1_score(precision, recall)
f2_score = calculate_f2_score(precision, recall)

In [None]:
# We want high recall and we don't particularly mind if precision is low
# because it just means that we are recommending more records than necessary
# but we're including all the relevant records in our recommendations

print(
    f"precision: {precision}"
)  # low precision = high fals positives (to be expected with low ANN similarity)
print(
    f"recall: {recall}"
)  # high recall = low false negatives (to be expected with low ANN similarity)
print(f"f1_score: {f1_score}")  # low f1 score = low precision
print(f"f2_score: {f2_score}")  # low f2 score = low recall