In [11]:
# Load libraries
import json
import faiss
from typing import List 
from sentence_transformers import SentenceTransformer

In [19]:
import sentence_transformers 

sentence_transformers.__version__

'2.2.2'

In [12]:
# Load pretrained model 
model = SentenceTransformer("msmarco-MiniLM-L-6-v3")

In [13]:
# Load Mapping Corpus
data_maps = json.load(open('corpus/self-declare.json', 'r'))

In [14]:
# Load index Faiss
index = faiss.read_index("corpus/self-declare.index")

In [15]:
# Create function to indexing results
def fetch_product(idx: int, query: str, df: List = data_maps):
    result = {}
    result["query"] = query
    try:
        info = df[idx]
        result["nama_produk_terdekat"]    = info['Nama Produk / Rincian']
        result["prediksi_jenis"] = info["Jenis Produk"]
        result["prediksi_kbli"]  = info["Kode KBLI"]
        result["message"]        = "Prediction Success"
    except IndexError:
        result["nama_produk_terdekat"]    = ""
        result["prediksi_jenis"] = ""
        result["prediksi_kbli"]  = ""
        result["message"]        = "Prediction Failed "
    return result

def search(query: str, index_vector: object = index, model_embedding: object = model):
    query_vector = model_embedding.encode([query])
    selected_idx = index_vector.search(query_vector, k = 1)
    selected_idx = selected_idx[1].tolist()[0][0]
    result = [fetch_product(selected_idx, query)]
    return result

In [18]:
%%time
# Test result
nama_produk_test = "Kacang 2 Kelinci"
result_prediction = search(nama_produk_test)

print(result_prediction)

[{'query': 'Kacang 2 Kelinci', 'nama_produk_terdekat': 'KACANG KELINCI', 'prediksi_jenis': 'Makanan ringan siap santap', 'prediksi_kbli': 10793, 'message': 'Prediction Success'}]
CPU times: total: 78.1 ms
Wall time: 30 ms
