In [1]:
from elasticsearch import Elasticsearch

In [2]:
es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic","divij23"),
    ca_certs="/Users/Suresh Babu/elasticsearch-8.13.2/config/certs/http_ca.crt"
)
es.ping()

True

## Prepare the data

In [3]:
import pandas as pd

df = pd.read_csv("collegereview2023.csv" ,lineterminator='\n').loc[:499]
df.head()

Unnamed: 0.1,Unnamed: 0,Name,college,review,rating
0,0,Aariz Amaan,IILM University,The faculty-student ratio is good none of the ...,7.0
1,1,Saurabh,Lovely Professional University - [LPU],There are many fests. In which the name of the...,10.0
2,2,Prasanna,Adithya Institute of Technology - [AIT],I am B tech (IT) The desire is not only intere...,6.3
3,3,Darshan,Chandigarh University - [CU],The campus life is very good. They conduct an ...,10.0
4,4,Sakshi Kishor Apsunde,Pune Vidyarthi Griha's College of Engineering ...,From the 3rd year and 1st semester of the 4th ...,9.0


In [4]:
df.isna().value_counts()

Unnamed: 0  Name   college  review  rating
False       False  False    False   False     500
Name: count, dtype: int64

In [5]:
df.fillna("None", inplace=True)

## Convert the relevant field to Vector using BERT model

In [6]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
df["DescriptionVectorV1"] = df["review"].apply(lambda x: model.encode(x))

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,college,review,rating,DescriptionVectorV1
0,0,Aariz Amaan,IILM University,The faculty-student ratio is good none of the ...,7.0,"[-0.0052866545, -0.002298334, -0.010407666, 0...."
1,1,Saurabh,Lovely Professional University - [LPU],There are many fests. In which the name of the...,10.0,"[0.013145822, -0.030787295, -0.009670104, 0.01..."
2,2,Prasanna,Adithya Institute of Technology - [AIT],I am B tech (IT) The desire is not only intere...,6.3,"[0.006332918, -0.04092432, 0.012361291, 0.0261..."
3,3,Darshan,Chandigarh University - [CU],The campus life is very good. They conduct an ...,10.0,"[0.010473956, 0.024847543, -0.0135174785, 0.00..."
4,4,Sakshi Kishor Apsunde,Pune Vidyarthi Griha's College of Engineering ...,From the 3rd year and 1st semester of the 4th ...,9.0,"[0.019880667, -0.0910866, -0.020267576, -0.013..."


In [9]:
es.ping()

True

## Create new index in ElasticSearch!

In [10]:
from indexM2 import indexM2

es.indices.create(index="all_productsps", mappings=indexM2)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [all_productsps/X2wMv7XzScmN93vUuGiH2g] already exists')

## Ingest the data into index

In [11]:
record_list = df.to_dict("records")

In [12]:
for record in record_list:
    try:
        es.index(index="all_productsps", document=record, id=record["Unnamed: 0"])
    except Exception as e:
        print(e)

In [13]:
es.count(index="all_productsps")

ObjectApiResponse({'count': 500, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search the data

In [14]:
input_keyword = "laxmi"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field" : "DescriptionVectorV1",
    "query_vector" : vector_of_input_keyword,
    "k" : 2,
    "num_candidates" : 500, 
}

res = es.knn_search(index="all_productsps", knn=query , source=["college","review"])
res["hits"]["hits"]

  res = es.knn_search(index="all_productsps", knn=query , source=["college","review"])


[{'_index': 'all_productsps',
  '_id': '11',
  '_score': 0.41137612,
  '_source': {'college': 'SRM Institute of Science and Technology - [SRMIST]',
   'review': 'mlan Tarana Aaruush shuru is the name of the fest and some of the others.\r\nThetes is a library and availability of books in many sectors.\r\nTheir facilities for sports are tennis ball badminton volleyball yoga and various other sports.\r\nthere are no such websites run by students and social group.mostly I choose this for low fees and another one would like to go for software developer.\r\n1:20,\r\nTeaching was good qualification was ph.d with least 3 years of experience.\r\nWe must get 50% and 4 months once exams have been conducted.'}},
 {'_index': 'all_productsps',
  '_id': '210',
  '_score': 0.4073125,
  '_source': {'college': 'Shri Ramswaroop Memorial College of Engineering and Management -  [SRMCM]',
   'review': 'Every year the college celebrates its annual cultural extravaganza Abhivyakti in February. Widely flaunte