In [48]:
from qdrant_client import models,QdrantClient
from qdrant_client.http.models import PointStruct
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import pandas as pd

In [2]:
encoder = SentenceTransformer('all-MiniLM-L6-v2')

Downloading .gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading 1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [11]:
df = pd.read_csv("bigBasketProducts.csv")
print(f"Products: {len(df)}")
df.head()

Products: 27555


Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
0,1,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...
1,2,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ..."
2,3,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m..."
3,4,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...
4,5,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...


In [21]:
documents = list(df.astype("str").to_dict(orient="records"))

In [14]:
documents[0]

{'index': 1,
 'product': 'Garlic Oil - Vegetarian Capsule 500 mg',
 'category': 'Beauty & Hygiene',
 'sub_category': 'Hair Care',
 'brand': 'Sri Sri Ayurveda ',
 'sale_price': 220.0,
 'market_price': 220.0,
 'type': 'Hair Oil & Serum',
 'rating': 4.1,
 'description': 'This Product contains Garlic Oil that is known to help proper digestion, maintain proper cholesterol levels, support cardiovascular and also build immunity.  For Beauty tips, tricks & more visit https://bigbasket.blog/'}

In [26]:
client = QdrantClient("localhost", port=6333)

In [27]:
client.recreate_collection(
    collection_name = "products_data",
    vectors_config =  models.VectorParams(
            size = encoder.get_sentence_embedding_dimension(),
            distance = models.Distance.COSINE,
            on_disk=True
        )
)

True

In [28]:
for index in tqdm(range(len(documents))):
    document = documents[index]
    client.upsert(
        collection_name="products_data",
        points=[
            PointStruct(
                id=index,
                vector=encoder.encode(
                    'description : ' + document['description'] +
                    ' product : ' +  document['product'] +
                    ' sub_category : '+ document['sub_category']+
                    ' brand : ' + document['brand'] +
                    ' typ : '+ document['type']
                 ).tolist(),
                payload= document,
            )
        ]
    )

100%|██████████| 27555/27555 [35:53<00:00, 12.79it/s]  


In [None]:
model_name = "deepset/roberta-base-squad2"

nlp = pipeline("question-answering",model = model_name,tokenizer=model_name)
QA_input = {
    'question': 'Why is model conversion important?',
    'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
}
res = nlp(QA_input)

In [50]:
res

{'score': 0.21171477437019348,
 'start': 59,
 'end': 84,
 'answer': 'gives freedom to the user'}

In [74]:
def get_results(query):
    results = client.search(
        collection_name='products_data',
        query_vector=encoder.encode(query).tolist(),
        limit=6)
    # print(encoder.encode(query).tolist())
    # val = sorted(results, key=lambda x: x["score"], reverse=True)
    # val = results[0]
    context = [f"brand is {dict(val)['payload']['brand']}"+
               f" category is {dict(val)['payload']['category']}"+ 
                f" description is {dict(val)['payload']['description']}"+
                f" market_price is {dict(val)['payload']['market_price']}"+
                f" product is {dict(val)['payload']['product']}"+
                f" rating is {dict(val)['payload']['rating']}"
                for val in results]
    
    # sorted_result = sorted(results, key=lambda x: x["score"], reverse=True)
    context = ". ".join(context)
    context = str(results)
    print(context)
    input = {
        'question':query,
        'context':context
    }
    answer = nlp(input)
    return answer

In [75]:
get_results("which product is produced by Nutrashil brand")

[ScoredPoint(id=27470, version=27470, score=0.6158729, payload={'brand': 'Ferrero', 'category': 'Gourmet & World Food', 'description': "It's so delicious that a little goes a long way, A unique and unbeatable taste, Contains no artificial preservatives, Spread on your Bread for Breakfast to start Positively your day, Contains no artificial colours. Nutella® has an authentic taste of hazelnuts and cocoa and its unique creaminess intensifies the flavour. It is so delicious that even a small amount is highly satisfying: 15g/1 tbsp of Nutella spread on bread is enough to guarantee an unbeatable taste and a pleasurable experience.", 'index': '27471', 'market_price': '650.0', 'product': 'Nutella', 'rating': '3.0', 'sale_price': '650.0', 'sub_category': 'Sauces, Spreads & Dips', 'type': 'Chocolate, Peanut Spread'}, vector=None), ScoredPoint(id=9420, version=9420, score=0.6158729, payload={'brand': 'Ferrero', 'category': 'Gourmet & World Food', 'description': "It's so delicious that a little g

{'score': 0.2834286093711853, 'start': 351, 'end': 359, 'answer': 'Nutella®'}

In [41]:
dict(val[0])["payload"]

{'brand': 'Ferrero',
 'category': 'Gourmet & World Food',
 'description': "It's so delicious that a little goes a long way, A unique and unbeatable taste, Contains no artificial preservatives, Spread on your Bread for Breakfast to start Positively your day, Contains no artificial colours. Nutella® has an authentic taste of hazelnuts and cocoa and its unique creaminess intensifies the flavour. It is so delicious that even a small amount is highly satisfying: 15g/1 tbsp of Nutella spread on bread is enough to guarantee an unbeatable taste and a pleasurable experience.",
 'index': '27471',
 'market_price': '650.0',
 'product': 'Nutella',
 'rating': '3.0',
 'sale_price': '650.0',
 'sub_category': 'Sauces, Spreads & Dips',
 'type': 'Chocolate, Peanut Spread'}