In [3]:
from elasticsearch import Elasticsearch

In [5]:
es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic","TioU8yPDHfDL*g*x=Rjw"),
    ca_certs="D:/appic-task/elasticsearch-8.17.0/config/certs/http_ca.crt"
)
es.ping()

True

## Prepare the data

In [9]:
import pandas as pd

df = pd.read_csv("myntra_products_catalog.csv").loc[:1999]
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [16]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor  DescriptionVector
False      False        False         False   False        False      False        False         False                2000
Name: count, dtype: int64

In [11]:
df.fillna("None", inplace=True)

## Convert the relevant field to Vector using BERT model

In [13]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

In [14]:
df["DescriptionVector"] = df["Description"].apply(lambda x: model.encode(x))

In [15]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645713, -0.0026341556, -0.003588426, 0.0..."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660744, -0.028755462, -0.02033244, 0.03..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.046943355, 0.08182783, 0.048335187, -0.000..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098742, -0.010285478, 0.009487344, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746514, 0.0062095993, 0.021813946, 0.02..."


In [17]:
es.ping()

True

## Create new index in ElasticSearch!

In [18]:
from indexMapping import indexMapping

es.indices.create(index="all_products", mappings=indexMapping)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'all_products'})

## Ingest the data into index

In [19]:
record_list = df.to_dict("records")

In [20]:
for record in record_list:
    try:
        es.index(index="all_products", document=record, id=record["ProductID"])
    except Exception as e:
        print(e)

In [21]:
es.count(index="all_products")

ObjectApiResponse({'count': 2000, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search the data

In [22]:
input_keyword = "Blue Shoes"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field" : "DescriptionVector",
    "query_vector" : vector_of_input_keyword,
    "k" : 2,
    "num_candidates" : 500, 
}

res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
res["hits"]["hits"]

  res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
  res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])


[{'_index': 'all_products',
  '_id': '10017133',
  '_score': 0.6227792,
  '_source': {'ProductName': 'Carrera Men Blue Sneakers',
   'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailSynthetic upperCushioned footbedTextured and patterned outsoleWarranty: 1 monthWarranty provided by brand/manufacturer'}},
 {'_index': 'all_products',
  '_id': '10029687',
  '_score': 0.61968666,
  '_source': {'ProductName': 'Force 10 Men Blue Sneakers',
   'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailSynthetic Leather upperCushioned footbedTextured and patterned outsole'}}]

In [23]:
input_keyword = "Collection of ornaments"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field" : "DescriptionVector",
    "query_vector" : vector_of_input_keyword,
    "k" : 5,
    "num_candidates" : 2000, 
}

res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
res["hits"]["hits"]

  res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
  res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])


[{'_index': 'all_products',
  '_id': '1000716',
  '_score': 0.57399195,
  '_source': {'ProductName': 'ahilya Gold-Plated Sterling Silver & Green Hoop Earrings',
   'Description': 'A pair of gold-plated sterling silver and green hoop earrings, has floral detail with multiple multifaceted stone embellishments, and a jhumki dangler with dangling\xa0synthetic\xa0pearls belowSecured with a stylised\xa0insert closure'}},
 {'_index': 'all_products',
  '_id': '10031117',
  '_score': 0.567624,
  '_source': {'ProductName': 'Moedbuille Women Multicoloured Silver-Plated Handcrafted Jewellery Set',
   'Description': 'This jewellery set consists of a necklace and a pair of earringsMulticoloured silver-plated handcrafted mult-stranded necklace, has textured and stone-studded detail, a centre piece with multiple danglers, secured with an adjustable lobster claspA pair of matching stud earrings, secured with post and back closure'}},
 {'_index': 'all_products',
  '_id': '1000717',
  '_score': 0.5651760