In [None]:
from typing import Any
from llama_index.core.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from zhipuai import ZhipuAI
from dotenv import dotenv_values


class GLM(CustomLLM):
    config = dotenv_values(".env")
    glm_api_key = config["glm_api_key"]
    temperature = 0.1
    context_window = 3900
    num_output = 1024
    model_name = "glm-4"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        client = ZhipuAI(api_key=self.glm_api_key)
        response = client.chat.completions.create(
            model = self.model_name,
            temperature = self.temperature,
            messages=[
                {"role": "user", "content": prompt}
            ],
        )

        content = response.choices[0].message.content
        return CompletionResponse(text=content)

    @llm_completion_callback()
    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
        client = ZhipuAI(api_key=self.glm_api_key)
        response = client.chat.completions.create(
            model=self.model_name,
            temperature = self.temperature,
            messages=[
                {"role": "user", "content": prompt},
            ],
            stream=True,
        )
        # 处理流式响应
        for chunk in response:
            delta = chunk.choices[0].delta  # 获取增量文本
            yield delta  # 使用生成器返回增量文本