In [ ]:
# CloudflareWorkersAI
from dotenv import load_dotenv
import os
from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI

# 加载当前目录下的.env文件
# load_dotenv()
# load_dotenv(override=True) 会重新读取.env
load_dotenv(override=True)

# 现在可以像访问普通环境变量一样访问.env文件中的变量了
account_id = os.getenv('CF_ACCOUNT_ID')
api_token = os.getenv('CF_API_TOKEN')

print(account_id)
print(api_token)

import getpass

model = '@cf/meta/llama-3-8b-instruct'
cf_llm = CloudflareWorkersAI(account_id=account_id, api_token=api_token, model=model)

# 最新的Embedding方式
# cloudflare_workersai
from langchain_community.embeddings.cloudflare_workersai import (
    CloudflareWorkersAIEmbeddings,
)

# //维度是：384
embeddings = CloudflareWorkersAIEmbeddings(
    account_id=account_id,
    api_token=api_token,
    model_name="@cf/baai/bge-small-en-v1.5",
)

In [ ]:
from langchain.retrievers.web_research import WebResearchRetriever


In [ ]:
import os
from langchain.vectorstores import Chroma
from langchain.chat_models.openai import ChatOpenAI
from langchain.utilities import GoogleSearchAPIWrapper

# Vectorstore
vectorstore = Chroma(embedding_function=embeddings, persist_directory="./chroma_db_oai")

# LLM

# Request from https://programmablesearchengine.google.com/controlpanel/all
os.environ["GOOGLE_CSE_ID"] = "yor cse id"
# Request from https://developers.google.com/custom-search/v1/introduction
os.environ["GOOGLE_API_KEY"] = "your google api key"
search = GoogleSearchAPIWrapper()

In [ ]:
# vitamin 维生素；维他命
search.run("What is vitamin?")

In [ ]:
web_research_retriever = WebResearchRetriever.from_llm(
    vectorstore=vectorstore,
    llm=cf_llm,
    search=search,
)

In [1]:
# !pip install -q -U html2text



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [ ]:
from langchain.chains import RetrievalQAWithSourcesChain

user_input = "Who is the winner of FIFA world cup 2002?"
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(cf_llm, retriever=web_research_retriever)
result = qa_chain({"question": user_input})
result


In [ ]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)
user_input = "What is Task Decomposition in LLM Powered Autonomous Agents?"
docs = web_research_retriever.get_relevant_documents(user_input)

In [ ]:
import os
import re
from typing import List
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers.pydantic import PydanticOutputParser

# LLMChain
search_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an assistant tasked with improving Google search
    results. Generate 5 Google search queries that are similar to
    this question. The output should be a numbered list of questions and each
    should have a question mark at the end: {question}""",
)


class LineList(BaseModel):
    """List of questions."""

    lines: List[str] = Field(description="Questions")


class QuestionListOutputParser(PydanticOutputParser):
    """Output parser for a list of numbered questions."""

    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = re.findall(r"\d+\..*?\n", text)
        return LineList(lines=lines)


llm_chain = LLMChain(llm=cf_llm, prompt=search_prompt, output_parser=QuestionListOutputParser())

In [ ]:
# Initialize
web_research_retriever_llm_chain = WebResearchRetriever(vectorstore=vectorstore, llm_chain=llm_chain, search=search)

# Run
docs = web_research_retriever_llm_chain.get_relevant_documents("What is the recommended way to recycle plastics?")


In [ ]:
docs