In [1]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

print("Environment variables loaded successfully!")
print(f"JINA_API_KEY loaded: {'✅' if os.getenv('JINA_API_KEY') else '❌'}")
print(f"OPENROUTER_API_KEY loaded: {'✅' if os.getenv('OPENROUTER_API_KEY') else '❌'}")

Environment variables loaded successfully!
JINA_API_KEY loaded: ✅
OPENROUTER_API_KEY loaded: ❌


In [1]:
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
import os

# Initialize LLM using environment variable
llm = ChatOpenAI(
    openai_api_key=os.getenv('OPENROUTER_API_KEY'),
    openai_api_base="https://openrouter.ai/api/v1",
    model_name="deepseek/deepseek-r1-0528:free"
)

print("LLM initialized successfully!")

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
# Contoh Pencarian dengan FAISS dan Embedding Jina AI




import faiss
import numpy as np
import json
import requests
import os

# Load FAISS index
faiss_index = faiss.read_index("faiss_index")

# (Opsional) Load metadata jika ingin tahu mapping ke dokumen/chunk
with open("faiss_metadata.json", "r", encoding="utf-8") as f:
    metadata = json.load(f)

# Fungsi untuk mendapatkan embedding dari Jina AI
def get_jina_embedding(text, api_key, model="jina-embeddings-v3"):
    url = "https://api.jina.ai/v1/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    data = {
        "model": model,
        "task": "text-matching",
        "late_chunking": False,
        "truncate": False,
        "input": [text]
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        result = response.json().get("data", [])
        if result and "embedding" in result[0]:
            return result[0]["embedding"]
    print("Jina API error:", response.status_code, response.text)
    return None

# Contoh query
query = "bagaimana jika aku tidak membayar pajak?"

# Dapatkan embedding query dari Jina AI menggunakan environment variable
api_key = os.getenv('JINA_API_KEY')
if not api_key:
    raise Exception("JINA_API_KEY tidak ditemukan dalam environment variables")

query_emb = get_jina_embedding(query, api_key)
if query_emb is None:
    raise Exception("maaf, saya tidak mengetahuinya")

# Ubah ke numpy array dan reshape
query_emb_np = np.array(query_emb, dtype="float32").reshape(1, -1)

# Cari top-5 dokumen terdekat di FAISS
D, I = faiss_index.search(query_emb_np, k=10)
print("Top 10 index:", I)
print("Top 10 distance:", D)

# Ambil metadata dan tampilkan hasil
for idx in I[0]:
    print(metadata[idx])

# (Opsional) Menyusun konteks untuk LLM dari hasil FAISS
context = "\n\n".join([str(metadata[idx]) for idx in I[0]])

messages = [
    SystemMessage(content="Jawablah hanya berdasarkan informasi berikut. Jika tidak ada info relevan, jawab: 'Maaf, saya tidak tahu.'"),
    SystemMessage(content=f"Konteks dokumen:\n{context}"),
    HumanMessage(content=query)
]

response = llm(messages)
print("Jawaban LLM:", response.content)


Top 10 index: [[735 592 613 736 706 738 591 708 700 737]]
Top 10 distance: [[1.1741498 1.2079507 1.2236501 1.2271019 1.2380455 1.2385365 1.2389603
  1.2390895 1.2424563 1.2430928]]
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 1, 'chunk_index': 18}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 31}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 52}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 1, 'chunk_index': 19}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 145}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 1, 'chunk_index': 21}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 30}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 147}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 0, 'chunk_index': 139}
{'filename': 'Perwal Cimahi 6 2024.pdf', 'doc_part': 1, 'chunk_index': 20}


AuthenticationError: Error code: 401 - {'error': {'message': 'No auth credentials found', 'code': 401}}

In [None]:
# Contoh Pencarian dengan FAISS dan Embedding Jina AI (Batch Embedding)

import faiss
import numpy as np
import json
import requests
import os

# Load FAISS index
faiss_index = faiss.read_index("faiss_index")

# (Opsional) Load metadata jika ingin tahu mapping ke dokumen/chunk
with open("faiss_metadata.json", "r", encoding="utf-8") as f:
    metadata = json.load(f)

# Fungsi untuk mendapatkan batch embedding dari Jina AI
def get_jina_batch_embedding(texts, api_key, model="jina-embeddings-v3"):
    url = "https://api.jina.ai/v1/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    data = {
        "model": model,
        "task": "retrieval.passage",
        "late_chunking": False,
        "truncate": False,
        "input": texts
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        result = response.json().get("data", [])
        return [item["embedding"] for item in result if "embedding" in item]
    print("Jina API error:", response.status_code, response.text)
    return None

# Contoh batch query
queries = [
    "bagaimana cara membuat KTP ?",
    "prosedur pembayaran pajak kendaraan",
    "syarat mengurus akta kelahiran"
]

# Dapatkan batch embedding query dari Jina AI menggunakan environment variable
api_key = os.getenv('JINA_API_KEY')
if not api_key:
    raise Exception("JINA_API_KEY tidak ditemukan dalam environment variables")

query_embs = get_jina_batch_embedding(queries, api_key)
if not query_embs:
    raise Exception("maaf, saya tidak mengetahuinya")

# Ubah ke numpy array dan reshape
query_embs_np = np.array(query_embs, dtype="float32")

# Cari top-10 dokumen terdekat di FAISS untuk setiap query
for i, query_emb_np in enumerate(query_embs_np):
    D, I = faiss_index.search(query_emb_np.reshape(1, -1), k=10)
    print(f"\nQuery: {queries[i]}")
    print("Top 10 index:", I)
    print("Top 10 distance:", D)
    # Ambil metadata dan tampilkan hasil
    for idx in I[0]:
        print(metadata[idx])
    # (Opsional) Menyusun konteks untuk LLM dari hasil FAISS
    context = "\n\n".join([str(metadata[idx]) for idx in I[0]])

    from langchain_openai import ChatOpenAI
    from langchain.schema import SystemMessage, HumanMessage

    llm = ChatOpenAI(
        openai_api_key=os.getenv("OPENAI_API_KEY"),
        openai_api_base="https://openrouter.ai/api/v1",
        model_name="deepseek/deepseek-r1-0528:free"
    )

    messages = [
        SystemMessage(content="Jawablah hanya berdasarkan informasi berikut. Jika tidak ada info relevan, jawab: 'Maaf, saya tidak tahu.'"),
        SystemMessage(content=f"Konteks dokumen:\n{context}"),
        HumanMessage(content=queries[i])
    ]

    response = llm(messages)
    print("Jawaban LLM:", response.content)



Query: bagaimana cara membuat KTP ?
Top 10 index: [[ 95  93  92  94  91  90  88  89 106 301]]
Top 10 distance: [[1.3126535 1.3311918 1.3343139 1.3721619 1.3989358 1.3989913 1.4018297
  1.4195609 1.4255455 1.4320018]]
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 95}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 93}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 92}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 94}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 91}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 90}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 88}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 'chunk_index': 89}
{'filename': 'Perda Kota Cimahi No. 8 Tahun 2014.pdf', 'doc_part': 0, 

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable