In [10]:
import os
import sys
import time
import json
import random
from typing import List

from lightrag.kg.shared_storage import initialize_pipeline_status

import asyncio
import nest_asyncio

from lightrag import LightRAG, QueryParam
from lightrag.llm.hf import hf_embed
from lightrag.utils import EmbeddingFunc
from transformers import AutoModel, AutoTokenizer
from lightrag.llm.openai import openai_complete_if_cache
from lightrag.utils import detect_language
with open("C:\\Users\\mhieu\\Desktop\\TN\\LIGHTRAG\\api_keys.json", 'r', encoding='utf-8') as f:
    OPENROUTER_API_KEYS = json.load(f)

class APIManager:
    def __init__(self, api_keys: List[str]):
        self.api_keys = api_keys
        self.current_key_index = 0
        self.failed_keys = set()
        self.last_switch_time = {}  
        
    def get_current_api_key(self):
        return self.api_keys[self.current_key_index]
    
    def switch_to_next_key(self):
        
        self.failed_keys.add(self.current_key_index)
        self.last_switch_time[self.current_key_index] = time.time()
        
        available_keys = []
        for idx in range(len(self.api_keys)):
            if idx not in self.failed_keys:
                available_keys.append(idx)
            elif idx in self.last_switch_time:
                # Nếu đã qua 10 phút kể từ lần cuối sử dụng key này
                if time.time() - self.last_switch_time[idx] > 600:
                    self.failed_keys.remove(idx)
                    available_keys.append(idx)
        
        if not available_keys:
            raise RuntimeError("Tất cả API keys đều đã thất bại. Vui lòng thử lại sau.")
        
        self.current_key_index = random.choice(available_keys)
        print(f"Đã chuyển sang API key: {self.api_keys[self.current_key_index][:5]}...")
        return self.get_current_api_key()
    
    def reset_key(self, key_index):
        if key_index in self.failed_keys:
            self.failed_keys.remove(key_index)

# Khởi tạo API Manager
api_manager = APIManager(OPENROUTER_API_KEYS)

WORKING_DIR = "./test_duo"

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

current_api_key = api_manager.get_current_api_key()
    
async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
    global current_api_key
    max_retries = len(OPENROUTER_API_KEYS)
    retry_count = 0
    
    while retry_count < max_retries:
        try:
            response = await openai_complete_if_cache(
                "google/gemini-2.0-flash-exp:free",
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                api_key=current_api_key, 
                base_url=os.getenv("LLM_BINDING_HOST", "https://openrouter.ai/api/v1"),
                **kwargs
            )
            
            key_index = OPENROUTER_API_KEYS.index(current_api_key)
            api_manager.reset_key(key_index)
            return response
            
        except Exception as e:
            print(f"Lỗi với API key: {str(e)}")
            retry_count += 1
            
            if retry_count < max_retries:
                print("Đang chuyển sang API key tiếp theo...")
                current_api_key = api_manager.switch_to_next_key()
                os.environ["LLM_BINDING_API_KEY"] = current_api_key
            else:
                print("Tất cả API keys đều đã thất bại.")
                raise e
    
    raise RuntimeError("Tất cả API keys đều đã thất bại")

print("Loading model...")
print("google/gemini-2.0-flash-exp:free")

os.environ["LLM_BINDING_API_KEY"] = current_api_key
async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
        embedding_func=EmbeddingFunc(
            embedding_dim=1024,
            max_token_size=5000,
            func=lambda texts: hf_embed(
                texts,
                tokenizer=AutoTokenizer.from_pretrained(
                    "BAAI/bge-m3"
                ),
                embed_model=AutoModel.from_pretrained(
                    "BAAI/bge-m3"
                ),
            ),
        ),
        addon_params={
            # "insert_batch_size": 20,
            "language": "Vietnamese"
        }
    )
    await rag.initialize_storages()
    await initialize_pipeline_status()

    return rag


async def main():
    rag = await initialize_rag()
    entities_vdb = rag.entities_vdb
    entity_id = "ent-3ec1c563b6bb321152a391a90957a38b"
    # Use proper client._client.get method
    result = entities_vdb._client.get([entity_id])
    print(result)


Loading model...
google/gemini-2.0-flash-exp:free


In [11]:
if __name__ == "__main__":
    nest_asyncio.apply()
    asyncio.run(main())

INFO:lightrag:Logger initialized for working directory: ./test_duo
INFO:lightrag:Load KV llm_response_cache with 1 data
INFO:lightrag:Load KV full_docs with 7 data
INFO:lightrag:Load KV text_chunks with 7 data
INFO:lightrag:Loaded graph from ./test_duo\graph_chunk_entity_relation.graphml with 50 nodes, 48 edges
INFO:nano-vectordb:Load (50, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_entities.json'} 50 data
INFO:nano-vectordb:Load (48, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_relationships.json'} 48 data
INFO:nano-vectordb:Load (7, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_chunks.json'} 7 data
INFO:lightrag:Loaded document status storage with 8 records
ERROR: Error: try to getnanmespace before it is initialized, pid=11516


ValueError: Shared dictionaries not initialized

In [13]:
import os
import sys
import time
import json
import random
from typing import List


from lightrag import LightRAG, QueryParam
from lightrag.llm.hf import hf_embed
from lightrag.utils import EmbeddingFunc
from transformers import AutoModel, AutoTokenizer
from lightrag.llm.openai import openai_complete_if_cache
from lightrag.utils import detect_language
with open("C:\\Users\\mhieu\\Desktop\\TN\\LIGHTRAG\\api_keys.json", 'r', encoding='utf-8') as f:
    OPENROUTER_API_KEYS = json.load(f)

class APIManager:
    def __init__(self, api_keys: List[str]):
        self.api_keys = api_keys
        self.current_key_index = 0
        self.failed_keys = set()
        self.last_switch_time = {}  
        
    def get_current_api_key(self):
        return self.api_keys[self.current_key_index]
    
    def switch_to_next_key(self):
        
        self.failed_keys.add(self.current_key_index)
        self.last_switch_time[self.current_key_index] = time.time()
        
        available_keys = []
        for idx in range(len(self.api_keys)):
            if idx not in self.failed_keys:
                available_keys.append(idx)
            elif idx in self.last_switch_time:
                # Nếu đã qua 10 phút kể từ lần cuối sử dụng key này
                if time.time() - self.last_switch_time[idx] > 600:
                    self.failed_keys.remove(idx)
                    available_keys.append(idx)
        
        if not available_keys:
            raise RuntimeError("Tất cả API keys đều đã thất bại. Vui lòng thử lại sau.")
        
        self.current_key_index = random.choice(available_keys)
        print(f"Đã chuyển sang API key: {self.api_keys[self.current_key_index][:5]}...")
        return self.get_current_api_key()
    
    def reset_key(self, key_index):
        if key_index in self.failed_keys:
            self.failed_keys.remove(key_index)

# Khởi tạo API Manager
api_manager = APIManager(OPENROUTER_API_KEYS)

WORKING_DIR = "./test_duo"

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

current_api_key = api_manager.get_current_api_key()
    
async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
    global current_api_key
    max_retries = len(OPENROUTER_API_KEYS)
    retry_count = 0
    
    while retry_count < max_retries:
        try:
            response = await openai_complete_if_cache(
                "google/gemini-2.0-flash-exp:free",
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                api_key=current_api_key, 
                base_url=os.getenv("LLM_BINDING_HOST", "https://openrouter.ai/api/v1"),
                **kwargs
            )
            
            key_index = OPENROUTER_API_KEYS.index(current_api_key)
            api_manager.reset_key(key_index)
            return response
            
        except Exception as e:
            print(f"Lỗi với API key: {str(e)}")
            retry_count += 1
            
            if retry_count < max_retries:
                print("Đang chuyển sang API key tiếp theo...")
                current_api_key = api_manager.switch_to_next_key()
                os.environ["LLM_BINDING_API_KEY"] = current_api_key
            else:
                print("Tất cả API keys đều đã thất bại.")
                raise e
    
    raise RuntimeError("Tất cả API keys đều đã thất bại")

print("Loading model...")
print("google/gemini-2.0-flash-exp:free")

os.environ["LLM_BINDING_API_KEY"] = current_api_key

rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=1024,
        max_token_size=5000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=AutoTokenizer.from_pretrained(
                "BAAI/bge-m3"
            ),
            embed_model=AutoModel.from_pretrained(
                "BAAI/bge-m3"
            ),
        ),
    ),
    addon_params={
        # "insert_batch_size": 20,
        "language": "Vietnamese"
    }
)

# def insert_with_retry(data_original, data_translated, target_language="English"):
#     global current_api_key
#     max_retries = len(OPENROUTER_API_KEYS)
#     retry_count = 0
    
#     while retry_count < max_retries:
#         try:
  
#             os.environ["LLM_BINDING_API_KEY"] = current_api_key
#             rag.insert_duo(data_original, data_translated, target_language="English")
#             print("Chèn dữ liệu thành công!")
#             return
#         except Exception as e:
#             error_str = str(e).lower()
#             if "api" in error_str or "key" in error_str or "rate" in error_str or "limit" in error_str:
#                 print(f"Lỗi API khi chèn dữ liệu: {str(e)}")
#                 retry_count += 1
                
#                 if retry_count < max_retries:
#                     print("Đang chuyển sang API key tiếp theo...")
#                     current_api_key = api_manager.switch_to_next_key()
#                     os.environ["LLM_BINDING_API_KEY"] = current_api_key
#                 else:
#                     print("Tất cả API keys đều đã thất bại khi chèn dữ liệu.")
#                     raise e
#             else:
#                 raise e


def insert_with_retry(data, language):
    global current_api_key
    max_retries = len(OPENROUTER_API_KEYS)
    retry_count = 0
    
    while retry_count < max_retries:
        try:
  
            os.environ["LLM_BINDING_API_KEY"] = current_api_key
            rag.insert(data, language=language,matching_method="embedding")
            print("Chèn dữ liệu thành công!")
            return
        except Exception as e:
            error_str = str(e).lower()
            if "api" in error_str or "key" in error_str or "rate" in error_str or "limit" in error_str:
                print(f"Lỗi API khi chèn dữ liệu: {str(e)}")
                retry_count += 1
                
                if retry_count < max_retries:
                    print("Đang chuyển sang API key tiếp theo...")
                    current_api_key = api_manager.switch_to_next_key()
                    os.environ["LLM_BINDING_API_KEY"] = current_api_key
                else:
                    print("Tất cả API keys đều đã thất bại khi chèn dữ liệu.")
                    raise e
            else:
                raise e

def main():
    try:
        
        entities_vdb = rag.entities_vdb
        entity_id = "ent-3ec1c563b6bb321152a391a90957a38b"
        # Use proper client._client.get method
        result = entities_vdb._client.get([entity_id])
        for i, item in enumerate(entities_vdb._client._NanoVectorDB__storage["data"]):
            if item["__id__"] == entity_id:
                index = i
                break
        if index is not None:
            # Lấy vector từ matrix theo index
            vector = entities_vdb._client._NanoVectorDB__storage["matrix"][index]
            print(f"Vector for entity {entity_id}: {vector}")
            # return vector
        print(result)
        
    except Exception as e:
        print(f"Lỗi khi xử lý dữ liệu: {str(e)}")

if __name__ == "__main__":
    main()


INFO:lightrag:Logger initialized for working directory: ./test_duo


INFO:lightrag:Load KV llm_response_cache with 1 data
INFO:lightrag:Load KV full_docs with 13 data
INFO:lightrag:Load KV text_chunks with 13 data
INFO:lightrag:Loaded graph from ./test_duo\graph_chunk_entity_relation.graphml with 75 nodes, 74 edges
INFO:nano-vectordb:Load (88, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_entities.json'} 88 data
INFO:nano-vectordb:Load (75, 1024) data


Loading model...
google/gemini-2.0-flash-exp:free


INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_relationships.json'} 75 data
INFO:nano-vectordb:Load (14, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './test_duo\\vdb_chunks.json'} 14 data
INFO:lightrag:Loaded document status storage with 14 records


Vector for entity ent-3ec1c563b6bb321152a391a90957a38b: [-0.00950563 -0.04885593 -0.01246002 ...  0.00220176 -0.02015256
  0.0001128 ]
[{'__id__': 'ent-3ec1c563b6bb321152a391a90957a38b', '__created_at__': 1743247001.27852, 'entity_name': '"THÔNG TƯ"'}]
