In [None]:
import ast
import json
import numpy as np
import pandas as pd
import pickle
import torch
from multiprocessing import Pool
from transformers import AutoTokenizer, AutoModel
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, Filter, FieldCondition, Range, CollectionDescription
from qdrant_client.http import models
import sys
sys.path.append('../utils')

import data_utils as dut 

In [None]:
# Loading the processed records from the file
with open('../data/processed/processed_records.pkl', 'rb') as f:
    loaded_records = pickle.load(f)

# Search


## Search Params

In [None]:
# The search string to identify a suitable book
search_string = 'Find me a horror book that has an element of fantasy'

# Filters - please use None if you don't want to filter on a particular field
minimum_rating = None           # Minimum rating of the book as per user reviews
desired_category = "Fiction"    # Specify the category of the book

# The amount of results to return
n_return_matches = 5

In [None]:
# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Collection name in Qdrant
collection_name = "books"

In [None]:
# Filtering the records
filter_conditions = []

if minimum_rating is not None:
    filter_conditions.append(
        models.FieldCondition(
            key='review/score',
            range=models.Range(gte=minimum_rating)
        )
    )

if desired_category is not None:
    filter_conditions.append(
        models.FieldCondition(
            key='categories',
            match=models.MatchValue(value=desired_category)
        )
    )

query_filter = models.Filter(must=filter_conditions)

In [None]:
search_vector = dut.vectorize_texts([search_string], tokenizer, model)[0]
# Perform the search
search_results = qdrant_client.search(
    collection_name=collection_name,
    # query_filter=query_filter,
    search_params=models.SearchParams(hnsw_ef=128, exact=False),
    query_vector=search_vector,
    limit=n_return_matches
)

In [None]:
# Process search results to print titles
for point in search_results:
    point_id = point.id
    original_data_str = json_list[point_id]  # Get the original JSON string
    original_data_dict = json.loads(original_data_str)  # Parse the string into a dictionary
    title = original_data_dict.get('Title', 'No Title Available')
    print(f"Point ID: {point_id}, Title: {title}")