In [2]:
from elasticsearch import Elasticsearch

In [118]:
es = Elasticsearch(
    "https://192.168.18.136:9200",
    basic_auth=("elastic","FIRpdx+ua5jlE0K6Q3fs"),
    ca_certs="http_ca.crt"
)
es.ping()

True

## Prepare the data

In [119]:
import pandas as pd

df = pd.read_csv("myntra_products_catalog.csv").loc[:499]
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [122]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           500
Name: count, dtype: int64

In [123]:
df.fillna("None", inplace=True) #None is liay k elastic by default ye value laita hai

## Convert the relevant field to Vector using BERT model

In [124]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

In [125]:
df["DescriptionVector"] = df["Description"].apply(lambda x: model.encode(x))

In [126]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645754, -0.002634158, -0.0035884678, 0.0..."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660703, -0.028755417, -0.020332463, 0.0..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.04694326, 0.08182789, 0.048335202, -0.0001..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098771, -0.010285473, 0.009487319, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746478, 0.0062095774, 0.021813959, 0.02..."


In [127]:
es.ping()

True

In [128]:
# es.indices.delete(index='all_products') #delete previous index if present

## Create new index in ElasticSearch!

In [129]:
from indexMapping import indexMapping

es.indices.create(index="all_products", mappings=indexMapping)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'all_products'})

## Ingest the data into index

In [130]:
record_list = df.to_dict("records") #for one record

In [131]:
record_list[0]

{'ProductID': 10017413,
 'ProductName': 'DKNY Unisex Black & Grey Printed Medium Trolley Bag',
 'ProductBrand': 'DKNY',
 'Gender': 'Unisex',
 'Price (INR)': 11745,
 'NumImages': 7,
 'Description': 'Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer',
 'PrimaryColor': ' Black',
 'DescriptionVector': array([ 2.76457537e-02, -2.63415789e-03, -3.58846784e-03,  5.13587818e-02,
         3.09660640e-02,  1.40507268e-02,  7.27054710e-03,  3.13871354e-02,
        -6.23787940e-02, -3.82880215e-03,  3.15213799e-02,  7.55472854e-02,
         2.12645670e-03,  4.64893952e-02,  5.07448539e-02, -1.71941835e-02,
         1.22892149e-02, -1.95682291e-02, -9.6

In [132]:
#here we will insert all the records into es index
for record in record_list:
    try:
        es.index(index="all_products", document=record, id=record["ProductID"])
    except Exception as e:
        print(e)

In [133]:
es.count(index="all_products")

ObjectApiResponse({'count': 500, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search the data

In [142]:
input_keyword = "Blue Shoes" #user will insert input search 
vector_of_input_keyword = model.encode(input_keyword) #convert the input of user into vector
#now we will pass this vector to elasticsearch
query = {
    "field" : "DescriptionVector",  #we want to apply search on this field
    "query_vector" : vector_of_input_keyword, #this is input vector that will apply on DescriptionVectore field
    "k" : 2,  #it will return two results, we can increase this according to the requirement
    "num_candidates" : 500, #it will search on 500 documents
}

res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
es.k
res["hits"]["hits"]

AttributeError: 'Elasticsearch' object has no attribute 'k'

In [101]:

# vector_field = "DescriptionVector"

# query = {
#     "query": {
#         "script_score": {
#             "query": {
#                 "match_all": {}
#             },
#             "script": {
#                 "source": "cosineSimilarity(params.query_vector, '" + vector_field + "') + 1.0",
#                 "params": {
#                     "query_vector": vector_of_input_keyword
#                 }
#             }
#         }
#     }
# }

# response = es.search(index="all_products", body=query, _source=["ProductName", "Description"] )
# response["hits"]["hits"]

In [96]:


# # User input and model encoding
# input_keyword = "Blue Shoes"  # User will insert input search
# vector_of_input_keyword = model.encode(input_keyword)  # Convert the input of user into vector
# vector_field = "DescriptionVector"

# # Define the search query
# query = {
#     "_source": ["ProductName", "Description"],  # Specify the fields to return
#     "query": {
#         "script_score": {
#             "query": {
#                 "match_all": {}
#             },
#             "script": {
#                 "source": "cosineSimilarity(params.query_vector, '" + vector_field + "') + 1.0",
#                 "params": {
#                     "query_vector": vector_of_input_keyword
#                 }
#             }
#         }
#     }
# }

# # Perform the search
# response = es.search(index="all_products", body=query)

# # Print the hits
# response["hits"]["hits"]


  response = es.search(index="all_products", body=query)


[{'_index': 'all_products',
  '_type': '_doc',
  '_id': '10018013',
  '_score': 1.6860578,
  '_source': {'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer',
   'ProductName': 'Puma Men Blue Sneakers'}},
 {'_index': 'all_products',
  '_type': '_doc',
  '_id': '10018075',
  '_score': 1.6860578,
  '_source': {'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer',
   'ProductName': 'Puma Men Blue Sneakers'}},
 {'_index': 'all_products',
  '_type': '_doc',
  '_id': '10005997',
  '_score': 1.6434796,
  '_source': {'Description': 'A pair of round-toe navy blue sneakers, has mid-top styling, lace-up detailLeather upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty pro

In [99]:


# # User input and model encoding
# input_keyword = "Blue Shoes"  # User will insert input search
# vector_of_input_keyword = model.encode(input_keyword)  # Convert the input of user into vector
# vector_field = "DescriptionVector"

# # Define the search query
# query = {
#     "size":2,
#     "_source": ["ProductName", "Description"],  # Specify the fields to return
#     "query": {
#         "script_score": {
#             "query": {
#                 "match_all": {}
#             },
#             "script": {
#                 "source": "cosineSimilarity(params.query_vector, '" + vector_field + "') + 1.0",
#                 "params": {
#                     "query_vector": vector_of_input_keyword
#                 }
#             }
#         }
#     }
# }

# # Perform the search
# response = es.search(index="all_products", body=query)

# # Print the hits
# response["hits"]["hits"]


  response = es.search(index="all_products", body=query)


[{'_index': 'all_products',
  '_type': '_doc',
  '_id': '10018013',
  '_score': 1.6860578,
  '_source': {'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer',
   'ProductName': 'Puma Men Blue Sneakers'}},
 {'_index': 'all_products',
  '_type': '_doc',
  '_id': '10018075',
  '_score': 1.6860578,
  '_source': {'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer',
   'ProductName': 'Puma Men Blue Sneakers'}}]