In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings


loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [None]:
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
response = rag_chain.invoke("Task Decompositionとはなんですか？")
print(response)

In [None]:
response = rag_chain.invoke("LLMエージェントとはなんですか？")
print(response)

In [None]:
# cleanup
vectorstore.delete_collection()

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

bs4_strainer = bs4.SoupStrainer("article")
loader = WebBaseLoader(
    web_paths=("https://sakana.ai/ai-scientist/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()
len(docs[0].page_content)

In [None]:
print(docs[0].page_content[:500])

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

In [None]:
all_splits[1].metadata

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)

retrieved_docs = retriever.invoke("Sakana AIの新しい発表は何ですか？")
len(retrieved_docs)

In [None]:
print(retrieved_docs[0].page_content)

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
print(prompt.messages[0].prompt.input_variables)
print(prompt.messages[0].prompt.template)

In [None]:
example_messages = prompt.invoke(
    {"context": "filter context",
     "question": "filter question"}
).to_messages()

print(example_messages[0].content)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("Sakana AIの新しい発表は何ですか？日本語で教えてください。"):
    print(chunk, end="", flush=True)

In [None]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
)

response = chain.invoke("Sakana AIの新しい発表は何ですか？")
print(response.messages[0].content)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = """あなたは質問応答タスクのためのアシスタントです。
提供された以下の文脈情報を使用して質問に答えてください。
答えがわからない場合は、わからないと言ってください。
最大3文で回答し、簡潔に答えてください。

{context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Sakana AIの新しい発表は何ですか？日本語で教えて"})
print(response["answer"])

In [None]:
response.keys()

In [None]:
for document in response["context"]:
    print(document)
    print()

In [None]:
from langchain_core.prompts import PromptTemplate

template = """以下の文脈を使用して、最後の質問に答えてください。
答えが分からない場合は、答えを作ろうとせずに、分からないと言ってください。
最大3文で、できるだけ簡潔に答えてください。
回答の最後には必ず「ご質問ありがとうございます！」と付け加えてください。

{context}

質問: {question}

役立つ回答:"""

custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

print(rag_chain.invoke("Sakana AIの新しい技術を用いると何ができますか？"))