In [1]:
import faiss
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from transformers import pipeline


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = "intfloat/multilingual-e5-base"
VECTOR_DIM = 768
INDEX_PATH = r"D:\mldlai\hsechatbots\faiss\hse_faiss.index"
META_PATH = r"D:\mldlai\hsechatbots\faiss\hse_chunks.pkl"

In [3]:
model = SentenceTransformer(MODEL_NAME)
embeddings = np.load("chunk_embeddings.npy").astype("float32")

with open("materials/postembedding.txt", "r", encoding="utf-8") as f:
    chunks = [x.strip() for x in f.read().split("---") if x.strip()]

print(f"load {len(chunks)} chunks with shape {embeddings.shape}")

load 413 chunks with shape (413, 768)


In [4]:
#save index
index = faiss.IndexFlatL2(VECTOR_DIM)  
index.add(embeddings)
faiss.write_index(index, INDEX_PATH)

with open(META_PATH, "wb") as f:
    pickle.dump(chunks, f)

print(f"index saved to {INDEX_PATH}")
print(f"Metadata saved to {META_PATH}")

index saved to D:\mldlai\hsechatbots\faiss\hse_faiss.index
Metadata saved to D:\mldlai\hsechatbots\faiss\hse_chunks.pkl


## Querying

In [2]:
import faiss
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
import ollama

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
MODEL_NAME = "intfloat/multilingual-e5-base"
INDEX_PATH = r"D:\mldlai\hsechatbots\faiss\hse_faiss.index"
META_PATH = r"D:\mldlai\hsechatbots\faiss\hse_chunks.pkl"

model = SentenceTransformer(MODEL_NAME)
index = faiss.read_index(INDEX_PATH)

with open(META_PATH, "rb") as f:
    chunks = pickle.load(f)

In [12]:
def rag_query(question, top_k=10):
    query_emb = model.encode([question]).astype("float32")
    D, I = index.search(query_emb, top_k)
    context = "\n".join([chunks[i] for i in I[0]])

    prompt = f"""
Anda adalah asisten pelatihan HSE kapal. Gunakan informasi di konteks berikut.
jawab pertanyaan dengan bahasa Indonesia degan singkat namun jelas dan informatif.

KONTEKS:
{context}

PERTANYAAN:
{question}

JAWABAN:
"""
    response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
    )
    return response["message"]["content"]



In [14]:
user_q = "apa yang disebut sebagai manajemen kemelut"
answer = rag_query(user_q, top_k=10)
print("\n jawaban:")
print(answer)



 jawaban:
Manajemen kemelut atau Krisis Manajemen adalah keadaan-keadaan akibat dari adanya musibah-musibah yang serius, konsekwensinya dapat membahayakan keselamatan dan keamanan jiwa manusia dan harta benda.
