In [5]:
from typing import Any
from llama_index.core.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from zhipuai import ZhipuAI
from dotenv import dotenv_values
from typing import ClassVar

from loguru import logger
logger.add("logs/app_{time:YYYY-MM-DD}.log", level="INFO", rotation="00:00", retention="10 days", compression="zip")

class GLM(CustomLLM):
    config: dict = dotenv_values(".env")
    glm_api_key: ClassVar[str] = config["glm_api_key"]
    temperature: float = 0.1
    context_window: int = 3900
    num_output: int = 1024
    model_name: str = "glm-4-plus"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:

        try:
            client = ZhipuAI(api_key=self.glm_api_key)
            response = client.chat.completions.create(
                model = self.model_name,
                temperature = self.temperature,
                messages=[
                    {"role": "user", "content": prompt}
                ],
            )

            content = response.choices[0].message.content
            return CompletionResponse(text=content)
        except Exception as e:
            logger.error(f"Processing: {prompt} An error occurred during completion: {e}")
            return CompletionResponse(text="")

    @llm_completion_callback()
    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
        client = ZhipuAI(api_key=self.glm_api_key)
        response = client.chat.completions.create(
            model=self.model_name,
            temperature = self.temperature,
            messages=[
                {"role": "user", "content": prompt},
            ],
            stream=True,
        )
        # 处理流式响应
        for chunk in response:
            delta = chunk.choices[0].delta  # 获取增量文本
            yield delta  # 使用生成器返回增量文本

In [6]:
GLM().complete("你好")

CompletionResponse(text='你好👋！我是人工智能助手智谱清言（ChatGLM），很高兴见到你，欢迎问我任何问题。', additional_kwargs={}, raw=None, logprobs=None, delta=None)