# Elasticsearch Vector Search Pipeline

In [None]:
# Install dependencies if needed
!pip install -q sentence-transformers elasticsearch

In [None]:
# Import libraries
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import numpy as np

import logging
import sys

# Konfigurasi logging untuk Elasticsearch
logger = logging.getLogger("elasticsearch")
logger.setLevel(logging.DEBUG)  # atau logging.INFO
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

In [None]:
# Connect to Elasticsearch (make sure it's running on port 9200)
es = Elasticsearch("http://localhost:9200")
assert es.ping(), "Elasticsearch is not running. Please start it first."

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

documents = [
    "Jaket tahan air untuk musim hujan",
    "Sepatu lari ringan dan nyaman",
    "Kopi arabika asli dari Sumatera",
    "Kamera mirrorless untuk pemula",
    "Laptop ringan untuk kerja remote",
    "Smartphone dengan kamera 108MP",
    "Tas punggung anti air untuk traveling",
    "Headset Bluetooth dengan noise cancelling",
    "Kursi gaming ergonomis",
    "Meja kerja minimalis dari kayu jati",
    "Buku belajar Python untuk pemula",
    "Skincare wajah glowing alami",
    "Jam tangan tahan air sporty",
    "Mouse wireless dengan sensor presisi tinggi",
    "Keyboard mekanik RGB",
    "Powerbank 20000mAh fast charging",
    "Router WiFi 6 kecepatan tinggi",
    "Alat masak multifungsi 7-in-1",
    "Kipas angin portable dengan USB",
    "Panci stainless steel anti lengket",
    "Blender smoothie portable",
    "Dispenser galon bawah hemat listrik",
    "Cermin LED untuk makeup",
    "Gitar akustik untuk pemula",
    "Pakaian olahraga quick dry",
    "Helm motor SNI full face",
    "Tripod HP untuk konten kreator",
    "Kacamata anti radiasi untuk layar komputer",
    "Lampu tidur sensor gerak",
    "Rak dinding minimalis gantung",
    "Parfum pria aroma maskulin",
    "Hijab instan bahan adem",
    "Dompet kulit asli premium",
    "Charger mobil dual USB",
    "Speaker Bluetooth waterproof",
    "Set alat tulis lucu untuk sekolah",
    "Bantal tidur memory foam",
    "Matras yoga anti slip",
    "Botol minum stainless 1 liter",
    "Notebook A5 hardcover",
    "Jas hujan transparan unisex",
    "Celana jeans stretch pria",
    "Kaos polos oversize wanita",
    "Mainan edukatif balita",
    "Kamera CCTV wireless",
    "Masker wajah sheet mask",
    "Lemari plastik 5 susun",
    "Pisau dapur tajam set isi 5",
    "Sarung tangan motor anti air",
    "Selimut bulu super lembut"
]

# Encode jadi vector (normalized)
embeddings = model.encode(documents, normalize_embeddings=True).tolist()

In [None]:
# Create index with vector mapping (if not exists)
index_name = "produk"
if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name, body={
        "mappings": {
            "properties": {
                "nama": {"type": "text"},
                "vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"}
            }
        }
    })

In [None]:
# Index documents with vectors
bulk(es, [
    {"_index": index_name, "_source": {"nama": documents[i], "vector": embeddings[i]}} for i in range(len(documents))
])

In [None]:
# Search with a new query
query = "hujan"
query_vector = model.encode([query], normalize_embeddings=True)[0]

res = es.search(index=index_name, body={
    "knn": {
        "field": "vector",
        "query_vector": query_vector,
        "k": 1,
        "num_candidates": 5
    },
    "_source": ["nama"]
})

print("Hasil pencarian:")
for hit in res['hits']['hits']:
    print(f"- {hit['_source']['nama']} (score: {hit['_score']:.4f})")