In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/home/braj/Desktop/chaabi/bigBasketProducts.csv').apply(lambda x: x.astype(str).str.lower())

In [3]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")

In [5]:
qdrant = QdrantClient(":memory:")

In [6]:
qdrant.recreate_collection(
    collection_name="chaabi",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

True

In [7]:
qdrant.upload_records(
    collection_name="chaabi",
    records=[
        models.Record(
            id=idx, vector=encoder.encode(doc["description"]).tolist(), payload=doc
        )
        for idx, doc in enumerate(df.to_dict(orient='records'))
    ],
)

In [15]:
from transformers import AutoTokenizer, BertForQuestionAnswering
import torch

# Load the language model
tokenizer = AutoTokenizer.from_pretrained("deepset/bert-base-cased-squad2")
model = BertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2")
# Move the model to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Assuming 'index' is your Qdrant Index
def get_contextual_answers(user_query):
    # Query Vector Database for Relevant Products
    hits = qdrant.search(
    collection_name="chaabi",
    query_vector=encoder.encode(user_query).tolist(),
    limit=3,
)
    results=[]
    for response in hits:
        results.append(response.payload)

    # Contextual Answer Generation
    contextual_answers = []
    for result in results:
        question, text = user_query, result['description']
        inputs = tokenizer(question, text, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        answer_start_index = outputs.start_logits.argmax()
        answer_end_index = outputs.end_logits.argmax()
        predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
        answer = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
        
        contextual_answers.append({"product": result['product'], 'description':text, "response": answer})

    # Return Contextual Answers
    return contextual_answers


Some weights of the model checkpoint at deepset/bert-base-cased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
# Example usage
user_query = "Tell me about hair care products"
answers = get_contextual_answers(user_query)
for ans in answers:
    print('product:', ans['product'])
    print('database search response:', ans['description'])
    print('LLMresponse:', ans['response'])
    print('='*70)

product: advansed men hair cream - anti hairfall
database search response: styleã‚â strong and nourished hairã‚â every day, with parachute advancedã‚â after showerã‚â anti hairfall cream, enriched withã‚â almond oilã‚â that helps reduceã‚â hair-fallã‚â by 95%.ã‚â it contains coconut milk proteins to give you healthy and lively hair, while providing the perfect balance of nourishment and style with its unique, naturalã‚â cocolipidtmã‚â formula that gives hair concentratedã‚â coconut nourishmentã‚â right from the roots.ã‚â for best results, use daily. the products range from the classic after-shower hair cream to anti-dandruff with lemon & neem and anti-hairfall with almond.  for beauty tips, tricks & more visitâ https://bigbasket.blog/
LLMresponse: strong and nourished hairã ‚ â every day, with parachute advancedã ‚ â after showerã ‚ â anti hairfall cream, enriched withã ‚ â almond oilã ‚ â that helps reduceã ‚ â hair - fallã ‚ â by 95 %. ã ‚ â it contains coconut milk proteins to give 

In [19]:
# Example usage
user_query = "Tell me about cooking products"
answers = get_contextual_answers(user_query)
for ans in answers:
    print('product:', ans['product'])
    print('database search response:', ans['description'])
    print('LLMresponse:', ans['response'])
    print('='*70)

product: sausage - chicken
database search response: processed pre cooked
LLMresponse: Tell me about cooking products processed pre cooked
product: extra virgin olive oil
database search response: suitable to cook indian meals due to its neutral taste and maintains the good quality even at high temperatures while cooking.
LLMresponse: 
product: chicken - sausage (skinless)
database search response: chicken meat 60%, ice & water 24%, refined palm oil 4.3%, cereal binder 3.8%, isolated soya protein 2.5%, iodized salt, modified starch (e1442), mixed spices, milk powder, phosphates (e450, e451, e452), flavor enhancer (e621), anti-oxidant (e300), curing salt (e250), permitted colour (e616g), nutrition
LLMresponse: Tell me about cooking products chicken meat
