# dsRAG
<https://github.com/D-Star-AI/dsRAG>

in `dsrag` `llm.py`

```python
class MoonshotAPI(LLM):
    def __init__(self, model: str = "moonshot-1", temperature: float = 0.2, max_tokens: int = 1000):
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens

    def make_llm_call(self, chat_messages: list[dict]) -> str:
        from openai import OpenAI
        base_url = os.environ.get("DSRAG_MOONSHOT_BASE_URL", "https://api.moonshot.cn/v1")
        if base_url is not None:
            client = OpenAI(api_key=os.environ["MOONSHOT_API_KEY"], base_url=base_url)
        else:
            client = OpenAI(api_key=os.environ["MOONSHOT_API_KEY"])
        response = client.chat.completions.create(
            model=self.model,
            messages=chat_messages,
            max_tokens=self.max_tokens,
            temperature=self.temperature,
        )
        llm_output = response.choices[0].message.content.strip()
        return llm_output

    def to_dict(self):
        base_dict = super().to_dict()
        base_dict.update({
            'model': self.model,
            'temperature': self.temperature,
            'max_tokens': self.max_tokens,
        })
        return base_dict
```

In [3]:
from openai import OpenAI
import os
import pandas as pd
import numpy as np

os.environ["HTTP_PROXY"] = "http://127.0.0.1:7897/"
os.environ["HTTPS_PROXY"] = "http://127.0.0.1:7897/"
os.environ["http_proxy"] = "http://127.0.0.1:7897/"
os.environ["https_proxy"] = "http://127.0.0.1:7897/"
os.environ["all_proxy"] = "http://127.0.0.1:7897/"
os.environ["ALL_PROXY"] = "http://127.0.0.1:7897/"

# os.environ["HTTP_PROXY"] = ""
# os.environ["HTTPS_PROXY"] = ""
# os.environ["http_proxy"] = ""
# os.environ["https_proxy"] = ""
# os.environ["all_proxy"] = ""
# os.environ["ALL_PROXY"] = ""

with open("./api_key/moonshot_api_key", "r") as f:
    moonshot_api_key = f.read().rstrip()
with open("./api_key/cohere_api_key", "r") as f:
    cohere_api_key = f.read().rstrip()
with open("./api_key/openai_api_key", "r") as f:
    openai_api_key = f.read().rstrip()

In [7]:
from dsrag.create_kb import create_kb_from_file
from dsrag.knowledge_base import KnowledgeBase
from dsrag.embedding import CohereEmbedding
from dsrag.llm import OpenAIChatAPI
from dsrag.reranker import NoReranker
import os

llm = OpenAIChatAPI(model='gpt-4o-mini')
embedding_model = CohereEmbedding()

# os.environ["MOONSHOT_API_KEY"] = moonshot_api_key
# os.environ["CO_API_KEY"] = cohere_api_key
os.environ["DSRAG_OPENAI_BASE_URL"] = "https://api.deerapi.com"
os.environ["OPENAI_API_KEY"] = openai_api_key
# os.environ["CO_API_URL"] = "https://api.cohere.ai"

# Ensure the environment variables are available to subprocesses
# not global environment, but only for this process
# os.putenv("CO_API_KEY", cohere_api_key)
os.putenv("OPENAI_API_KEY", openai_api_key)
reranker = NoReranker()

kb = KnowledgeBase(
    kb_id="dst",
    reranker=reranker,
    auto_context_model=llm,
    embedding_model=embedding_model,
)

# 自定义chunking配置，防止进一步分块
chunking_config = {
    "chunk_size": 1000000,
    "min_length_for_chunking": 1000000,
}

# 添加文档时使用自定义的chunking配置
for i in range(100):
    file_path = f"./data/test/{i}.txt"
    kb.add_document(
        doc_id=f"{i}.txt", file_path=file_path, chunking_config=chunking_config
    )

  if mode == Mode.FUNCTIONS:


AttributeError: 'str' object has no attribute 'choices'

In [5]:
import os

os.environ["CO_API_KEY"] = cohere_api_key
print(os.environ["CO_API_KEY"])


nuZ6SfatSUqGq4HXuvpIG17aKkrnodMcXN2NV2uT


In [None]:
# 定义查询字符串
search_queries = ["What are the levels of AGI?", "What is the highest level of AGI?"]

# 执行查询并打印结果
results = kb.query(search_queries)
for segment in results:
    print(segment)

# preliminaries

In [1]:
import numpy as np
import pandas as pd

wiki_content_en_pre1 = pd.read_csv('data/wiki_content_en_pre1.csv')
for i in range(100):
    content = wiki_content_en_pre1['content'][i]
    # export to txt
    with open(f'data/test/{i}.txt', 'w') as f:
        f.write(content)
