In [1]:
from elasticsearch import Elasticsearch

In [2]:
es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic","iamaryan"),
    ca_certs="C:/elasticsearch-8.15.2/config/certs/http_ca.crt"
)
es.ping()

True

## Prepare the data

In [3]:
import pandas as pd

df = pd.read_csv("Fashion Dataset.csv").loc[:500]
df.head()

Unnamed: 0.1,Unnamed: 0,p_id,name,price,colour,brand,img,ratingCount,avg_rating,description,p_attributes
0,0,17048614.0,Khushal K Women Black Ethnic Motifs Printed Ku...,5099.0,Black,Khushal K,http://assets.myntassets.com/assets/images/170...,4522.0,4.418399,Black printed Kurta with Palazzos with dupatta...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."
1,1,16524740.0,InWeave Women Orange Solid Kurta with Palazzos...,5899.0,Orange,InWeave,http://assets.myntassets.com/assets/images/165...,1081.0,4.119334,Orange solid Kurta with Palazzos with dupatta<...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."
2,2,16331376.0,Anubhutee Women Navy Blue Ethnic Motifs Embroi...,4899.0,Navy Blue,Anubhutee,http://assets.myntassets.com/assets/images/163...,1752.0,4.16153,Navy blue embroidered Kurta with Trousers with...,"{'Add-Ons': 'NA', 'Body Shape ID': '333,424', ..."
3,3,14709966.0,Nayo Women Red Floral Printed Kurta With Trous...,3699.0,Red,Nayo,http://assets.myntassets.com/assets/images/147...,4113.0,4.088986,Red printed kurta with trouser and dupatta<br>...,"{'Add-Ons': 'NA', 'Body Shape ID': '333,424', ..."
4,4,11056154.0,AHIKA Women Black & Green Printed Straight Kurta,1350.0,Black,AHIKA,http://assets.myntassets.com/assets/images/110...,21274.0,3.978377,"Black and green printed straight kurta, has a ...","{'Body Shape ID': '424', 'Body or Garment Size..."


In [4]:
df.isna().value_counts()

Unnamed: 0  p_id   name   price  colour  brand  img    ratingCount  avg_rating  description  p_attributes
False       False  False  False  False   False  False  False        False       False        False           398
                                                       True         True        False        False           101
                                 True    False  False  True         True        False        False             1
            True   True   True   True    True   True   True         True        True         True              1
Name: count, dtype: int64

In [5]:
df.fillna("None", inplace=True)

  df.fillna("None", inplace=True)


## Convert the relevant field to Vector using BERT model

In [6]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

  from tqdm.autonotebook import tqdm, trange


In [8]:
df["DescriptionVector"] = df["description"].apply(lambda x: model.encode(x))

In [35]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645713, -0.0026341556, -0.003588426, 0.0..."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660744, -0.028755462, -0.02033244, 0.03..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.046943355, 0.08182783, 0.048335187, -0.000..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098742, -0.010285478, 0.009487344, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746514, 0.0062095993, 0.021813946, 0.02..."


In [9]:
es.ping()

True

## Create new index in ElasticSearch!

In [10]:
if es.indices.exists(index="all_products"):
    print("Index already exists")


Index already exists


In [11]:
from indexMapping import indexMapping

es.indices.create(index="all_products", mappings=indexMapping)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [all_products/KHjom6ZGR7myz7uxUxWbkQ] already exists')

## Ingest the data into index

In [12]:
record_list = df.to_dict("records")

In [13]:
record_list[0]

{'Unnamed: 0': 0,
 'p_id': 17048614.0,
 'name': 'Khushal K Women Black Ethnic Motifs Printed Kurta with Palazzos & With Dupatta',
 'price': 5099.0,
 'colour': 'Black',
 'brand': 'Khushal K',
 'img': 'http://assets.myntassets.com/assets/images/17048614/2022/2/4/b0eb9426-adf2-4802-a6b3-5dbacbc5f2511643971561167KhushalKWomenBlackEthnicMotifsAngrakhaBeadsandStonesKurtawit7.jpg',
 'ratingCount': 4522.0,
 'avg_rating': 4.4183989385227775,
 'description': "Black printed Kurta with Palazzos with dupatta <br> <br> <b> Kurta design:  </b> <ul> <li> Ethnic motifs printed </li> <li> Anarkali shape </li> <li> Regular style </li> <li> Mandarin collar,  three-quarter regular sleeves </li> <li> Calf length with flared hem </li> <li> Viscose rayon machine weave fabric </li> </ul> <br> <b> Palazzos design:  </b> <ul> <li> Printed Palazzos </li> <li> Elasticated waistband </li> <li> Slip-on closure </li> </ul>Dupatta Length 2.43 meters Width:&nbsp;88 cm<br>The model (height 5'8) is wearing a size S100% R

In [14]:
for record in record_list:
    try:
        es.index(index="all_products", document=record, id=record["ProductID"])
    except Exception as e:
        print(e)

'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'ProductID'
'Pro

In [15]:
es.count(index="all_products")

ObjectApiResponse({'count': 501, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search the data

In [16]:
input_keyword = "jeans"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field" : "DescriptionVector",
    "query_vector" : vector_of_input_keyword,
    "k" : 5,
    "num_candidates" : 500, 
}

res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])
res["hits"]["hits"]

  res = es.knn_search(index="all_products", knn=query , source=["ProductName","Description"])


[{'_index': 'all_products',
  '_id': '10009685',
  '_score': 0.5503423,
  '_source': {'ProductName': 'SPYKAR Women Blue Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans',
   'Description': 'Blue medium wash 5-pocket high-rise cropped jeans, clean look, light fade, has a button and zip closure, and waistband with belt loops'}},
 {'_index': 'all_products',
  '_id': '10009681',
  '_score': 0.5484132,
  '_source': {'ProductName': 'SPYKAR Women Blue Adora Skinny Fit Mid-Rise Clean Look Ankle-Length Stretchable Jeans',
   'Description': 'Blue dark wash 5-pocket mid-rise ankle-length jeans, clean look, light fade, has a button and zip closure, and waistband with belt loops'}},
 {'_index': 'all_products',
  '_id': '10009693',
  '_score': 0.5482862,
  '_source': {'ProductName': 'SPYKAR Women Blue Adora Skinny Fit Mid-Rise Low Distress Stretchable Ankle-Length Jeans',
   'Description': 'Blue light wash 5-pocket mid-rise ankle-length jeans, low distress, heavy fade, has a but

: 