In [1]:
import os

from dotenv import load_dotenv
from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI
from langchain_community.llms.tongyi import Tongyi
from langchain_openai import ChatOpenAI

load_dotenv(override=True)

account_id = os.getenv('CF_ACCOUNT_ID')
api_token = os.getenv('CF_API_TOKEN')
print(account_id)
print(api_token)

# CloudflareWorkersAI
model = '@cf/meta/llama-3-8b-instruct'
cf_llm = CloudflareWorkersAI(
    account_id=account_id,
    api_token=api_token,
    model=model
)

DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY')
print(DASHSCOPE_API_KEY)

# qwen
qw_llm = Tongyi(
    model='qwen2-1.5b-instruct'
)

# qwen 兼容 openai的接口
qw_llm_openai = ChatOpenAI(
    openai_api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
    openai_api_key=DASHSCOPE_API_KEY,
    model_name="qwen2-1.5b-instruct",
    temperature=0.7,
    streaming=True,
)

api_key = os.getenv('OPENAI_API_KEY')
base_url = os.getenv('OPENAI_API_BASE')
print(api_key)
print(base_url)

# openai/moonshot
ms_llm = ChatOpenAI(
    openai_api_base=base_url,
    openai_api_key=api_key,
    model_name="moonshot-v1-8k",
    temperature=0.7,
)


8483c3ec7a0cbc54a8d660b5b9002b04
Gcllof8ze6dgtcqFI5FQZ2SD_5tfCD4Db7NuS6jn
sk-01c5003340c3453b934052d737d45e01
sk-UGVpjuTwo2Q8pewoqUDfckw1A0pbSDli9ElFMeS9WareKknG
https://api.moonshot.cn/v1/


In [2]:
# 最新的Embedding方式
# cloudflare_workersai
from langchain_community.embeddings.cloudflare_workersai import (
    CloudflareWorkersAIEmbeddings,
)
import os
from dotenv import load_dotenv

load_dotenv()
account_id = os.getenv('CF_ACCOUNT_ID')
api_token = os.getenv('CF_API_TOKEN')

# @cf/baai/bge-large-en-v1.5
# 维度是：1024

# @cf/baai/bge-small-en-v1.5
# 维度是：384
embeddings = CloudflareWorkersAIEmbeddings(
    account_id=account_id,
    api_token=api_token,
    model_name="@cf/baai/bge-small-en-v1.5",
)


In [3]:
from langchain_community.vectorstores import SupabaseVectorStore
import os
from dotenv import load_dotenv
from supabase.client import Client, create_client

load_dotenv(override=True)

supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase_token = os.environ.get("SUPABASE_TOKEN")

print(f"supabase_url: " + supabase_url)
print(f"supabase_token: " + supabase_token)
print(f"supabase_key: " + supabase_key)

supabase: Client = create_client(supabase_url, supabase_key)

vectorstore_small = SupabaseVectorStore(
    embedding=embeddings,
    client=supabase,
    table_name="bge_small_vector",
    query_name="bge_small_match_documents",
)


supabase_url: https://infrxrfaftyrxvkwvncf.supabase.co
supabase_token: sbp_92789ce9c4eba823025bb099698f61b8929a59e2
supabase_key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImluZnJ4cmZhZnR5cnh2a3d2bmNmIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MTc5MTMwOTMsImV4cCI6MjAzMzQ4OTA5M30.4XcckGc0Uk-jj5j1aNbN2HnuXEi6Z4bjUrsAEhApXeM


In [39]:
from operator import itemgetter
from typing import List, Optional, Tuple
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import format_document
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from prompts import CONDENSE_QUESTION_PROMPT, DOCUMENT_PROMPT, LLM_CONTEXT_PROMPT

# from ..robus_llm import qw_llm_openai
# from ..robus_vectorstore import vectorstore_small

retriever = vectorstore_small.as_retriever(search_type="similarity", search_kwargs={"k": 3})

print(DOCUMENT_PROMPT)


def _combine_documents(
        docs, document_prompt=DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "Human: " + dialogue_turn[0]
        ai = "Assistant: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer


class ChainInput(BaseModel):
    chat_history: Optional[List[BaseMessage]] = Field(
        description="Previous chat messages."
    )
    question: str = Field(..., description="The question to answer.")


# 给定以下对话和后续问题，将后续问题重新表述为一个独立的问题，使用其原始语言。如果没有聊天记录，就将问题重新表述为一个独立的问题。
_inputs = RunnableParallel(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: _format_chat_history(x["chat_history"])
    )
                        | CONDENSE_QUESTION_PROMPT
                        | qw_llm_openai
                        | StrOutputParser(),
)

_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}

chain = _inputs | _context | LLM_CONTEXT_PROMPT | qw_llm_openai | StrOutputParser()

input_variables=['page_content'] template='\n---\nPASSAGE:\n{page_content}\n---\n'


In [7]:
chain = chain.with_types(input_type=ChainInput)

In [8]:
chain.invoke({"question": "游侠客是什么", "chat_history": ""})

2024-07-02 14:00:39,965:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-02 14:00:44,928:INFO - HTTP Request: POST https://infrxrfaftyrxvkwvncf.supabase.co/rest/v1/rpc/bge_small_match_documents?limit=3 "HTTP/1.1 200 OK"


ChatPromptValue(messages=[HumanMessage(content='\nUse the following passages to answer the user\'s question.\nEach passage has a SOURCE which is the title of the document. When answering, cite source name of the passages you are answering from below the answer in a unique bullet point list.\n\nIf you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\n----\n\n---\nPASSAGE:\n交通：各地往返海拉尔的交通费用（请在报名缴费成功后再预订大交通）\n保险：建议您购买旅游意外险，游侠客提供不同的保险套餐，可在下单过程中自行选购\n餐食：行程包含的餐食之外的用餐费用，自由用餐，约伴一起AA\n单房差：行程中所列住宿标间含2个床位。单人报名的，游侠客担保拼房，根据报名顺序安排同性客人或领队、导游拼房；若不同意游侠客安排拼房，请补1份单房差以享用整个房间（标注多人间的除外）\n其他：费用包含中不含的其他费用其他个人消费\n\n活动亮点\n\n醉美秋色景观：大兴安岭阿尔山+根河+呼伦贝尔草原，草原、湖泊、森林、湿地、河流等题材，拍摄中国醉美秋色\n深入阿尔山区：深入阿尔山景区内住宿，全天拍摄：乌苏浪子湖晨雾，驼峰岭天池晨曦，大峡谷慢门等\n安排人文场景：特别安排地道蒙古族拍摄题材：套马、马术表演；在私家摄影点安排“牧马归途”场景\n专业摄影领队：游侠客专属摄影领队+专业摄影行程设计，多个日出日落拍摄，把握合适拍摄时机，保证出片效率和质量\n\n具体行程\n\n行程概要\n\n天数：8天7晚\n出发：全国出发\n到达：海拉尔\n解散：海拉尔\n\n第1天 各地海拉尔市 ★集合日\n---\n\n\n\n---\nPASSAGE:\n全天\n  全天为活动全天集合日，各地游侠自行前往呼伦贝尔市的首府海拉尔集合。早到的游侠，可以去逛逛呼伦贝尔民

In [21]:
chain = _inputs

In [22]:
chain.invoke({"question": "游侠客是什么", "chat_history": ""})

2024-07-02 11:42:18,847:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"


{'standalone_question': 'What does "游侠客" mean?'}

In [23]:
chain = {'standalone_question': 'What does "游侠客" mean?'} | _context

In [27]:
# 是一个字典
chain

{'standalone_question': 'What does "游侠客" mean?',
 'context': RunnableLambda(itemgetter('standalone_question'))
 | VectorStoreRetriever(tags=['SupabaseVectorStore', 'CloudflareWorkersAIEmbeddings'], vectorstore=<langchain_community.vectorstores.supabase.SupabaseVectorStore object at 0x10822a050>, search_kwargs={'k': 3})
 | RunnableLambda(_combine_documents),
 'question': <function __main__.<lambda>(x)>}

In [5]:
chain = _inputs | _context | LLM_CONTEXT_PROMPT

In [6]:
chain.invoke({"question": "游侠客是什么", "chat_history": ""})

2024-07-02 14:00:12,146:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-02 14:00:16,107:INFO - HTTP Request: POST https://infrxrfaftyrxvkwvncf.supabase.co/rest/v1/rpc/bge_small_match_documents?limit=3 "HTTP/1.1 200 OK"


ChatPromptValue(messages=[HumanMessage(content='\nUse the following passages to answer the user\'s question.\nEach passage has a SOURCE which is the title of the document. When answering, cite source name of the passages you are answering from below the answer in a unique bullet point list.\n\nIf you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\n----\n\n---\nPASSAGE:\n交通：各地往返海拉尔的交通费用（请在报名缴费成功后再预订大交通）\n保险：建议您购买旅游意外险，游侠客提供不同的保险套餐，可在下单过程中自行选购\n餐食：行程包含的餐食之外的用餐费用，自由用餐，约伴一起AA\n单房差：行程中所列住宿标间含2个床位。单人报名的，游侠客担保拼房，根据报名顺序安排同性客人或领队、导游拼房；若不同意游侠客安排拼房，请补1份单房差以享用整个房间（标注多人间的除外）\n其他：费用包含中不含的其他费用其他个人消费\n\n活动亮点\n\n醉美秋色景观：大兴安岭阿尔山+根河+呼伦贝尔草原，草原、湖泊、森林、湿地、河流等题材，拍摄中国醉美秋色\n深入阿尔山区：深入阿尔山景区内住宿，全天拍摄：乌苏浪子湖晨雾，驼峰岭天池晨曦，大峡谷慢门等\n安排人文场景：特别安排地道蒙古族拍摄题材：套马、马术表演；在私家摄影点安排“牧马归途”场景\n专业摄影领队：游侠客专属摄影领队+专业摄影行程设计，多个日出日落拍摄，把握合适拍摄时机，保证出片效率和质量\n\n具体行程\n\n行程概要\n\n天数：8天7晚\n出发：全国出发\n到达：海拉尔\n解散：海拉尔\n\n第1天 各地海拉尔市 ★集合日\n---\n\n\n\n---\nPASSAGE:\n全天\n  全天为活动全天集合日，各地游侠自行前往呼伦贝尔市的首府海拉尔集合。早到的游侠，可以去逛逛呼伦贝尔民

In [9]:
chain.invoke({"question": "我刚才问了什么"})

KeyError: 'chat_history'

In [10]:
from langchain.schema.runnable import RunnableConfig
from langchain.load.serializable import Serializable
from typing import Optional, Dict
from langchain_core.runnables.utils import Input
from langchain.schema.runnable import Runnable


class StdOutputRunnable(Serializable, Runnable[Input, Input]):
    @property
    def lc_serializable(self) -> bool:
        return True

    def invoke(self, input: Dict, config: Optional[RunnableConfig] = None) -> Input:
        print(f"Hey, I received the name {input['name']}")
        return self._call_with_config(lambda x: x, input, config)

In [38]:
chain = (_inputs
         | _context
         | LLM_CONTEXT_PROMPT
         | qw_llm_openai
         | StrOutputParser())

2024-07-02 14:15:05,981:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"


TypeError: 'AIMessage' object is not subscriptable

In [40]:
chain.invoke({"question": "在游侠客，您能推荐一些旅游线路吗", "chat_history": ""})

2024-07-02 14:16:08,934:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-02 14:16:13,287:INFO - HTTP Request: POST https://infrxrfaftyrxvkwvncf.supabase.co/rest/v1/rpc/bge_small_match_documents?limit=3 "HTTP/1.1 200 OK"
2024-07-02 14:16:13,658:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"


'None of the given passages mention any popular travel routes available on GuoYiKe.'

In [41]:
chain.invoke({"question": "儿童多少钱", "chat_history": ""})

2024-07-02 14:16:53,647:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-02 14:16:55,664:INFO - HTTP Request: POST https://infrxrfaftyrxvkwvncf.supabase.co/rest/v1/rpc/bge_small_match_documents?limit=3 "HTTP/1.1 200 OK"
2024-07-02 14:16:55,981:INFO - HTTP Request: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions "HTTP/1.1 200 OK"


'The cost for children is not mentioned in the given passages.'