## 载入模型

In [2]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import dotenv

dotenv.load_dotenv()

llm = ChatOpenAI(model="gpt-4-turbo")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## 单次对话 demo


In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
])

output_parser = StrOutputParser()
chain = prompt | llm | output_parser

chain.invoke({"input": "Hi!"})

'Hello! How can I assist you today?'

## 多模态 demo

In [4]:
import base64

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')
  
image_base64 = encode_image("../docs/figures/s-3.png")

In [5]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "What is in this image?"},
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/png;base64,{image_base64}",
            "detail": "auto"
          }
        }
      ]
    }
  ],
  max_tokens=300
)

print(response.choices[0].message.content)

The image appears to be a graphical representation of an advanced driver-assistance system (ADAS). Here's a breakdown of its components:


2. **Center Section**: This part of the image has a diagram illustrating the flow of information and actions in the system. It includes sensory inputs from what appears to be a virtual or augmented reality view, analyzed by an ADAS system represented by a robot icon named "HCopilot". The robot processes the data, deciding that the driver should watch out for a car from the left.

3. **Right Section**: This part of the image offers another external view of the driving environment seen from outside the car, showing a sunny, urban setting with roads and buildings.

Overall, the image seems to illustrate how modern technology like ADAS can be used to enhance safety in driving through visual sensors and AI-driven analysis, providing real-time feedback and alerts to drivers.


In [13]:
from langchain_core.prompts.image import ImagePromptTemplate

messages = [
    {"type": "text", "text": "{input}"},
    {
        "type": "image_url",
        "image_url": {
            # "url": f"data:image/png;base64,{image_base64}",
            "url": "https://lsky.metattri.com/i/2024/04/05/660ecef5ef6f2.png",
            # "url": "{img_url}",
            "detail": "auto"
        }
    }
]

prompt_openai = [
    {
        "role": "user",
        "content": messages
    }
]

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical expert."),
    HumanMessage(content=messages),
])

chain = prompt | llm | output_parser
chain.invoke({"input": "What is in this image?"})
# chain.invoke({"input": "What is in this image?",
#               "img_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
#               })

'This image appears to showcase a group of individuals associated with an educational or instructional program, divided into categories such as Instructors, Co-Instructors, and Teaching Assistants. Each person is represented by a circular portrait, and their names are displayed below each image. If you have any specific questions about the layout or design of this image, feel free to ask!'

## RAG demo

In [16]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.image.ImagePromptTemplate.html")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)

vector = FAISS.from_documents(documents, embeddings)

In [17]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

retriever = vector.as_retriever()
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

input = "Does LangChain accept image prompt?"
res = retrieval_chain.invoke({"input": input})

print(res["answer"])

Yes, LangChain does accept image prompts. This is indicated by the presence of the `ImagePromptTemplate` class within the `langchain_core.prompts.image` module, which is specifically designed for creating image prompt templates for a multimodal model.


## 多轮对话 demo

In [18]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# First we need a prompt that we can pass into an LLM to generate this search query

prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up to get information relevant to the conversation")
])

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

from langchain_core.messages import HumanMessage, AIMessage

chat_history = [HumanMessage(content="Does LangChain accept image prompt?"), AIMessage(content="Yes!")]

retriever_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

In [20]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)

retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

In [22]:
retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how with example"
})

{'chat_history': [HumanMessage(content='Does LangChain accept image prompt?'),
  AIMessage(content='Yes!')],
 'input': 'Tell me how with example',
 'context': [Document(page_content='param output_parser: Optional[BaseOutputParser] = None¶\nHow to parse the output of calling an LLM on this formatted prompt.\n\n\n\nparam partial_variables: Mapping[str, Any] [Optional]¶\nA dictionary of the partial variables the prompt template carries.\nPartial variables populate the template so that you don’t need to\npass them in every time you call the prompt.\n\n\n\nparam tags: Optional[List[str]] = None¶\nTags to be used for tracing.\n\n\n\nparam template: dict [Optional]¶\nTemplate for the prompt.\n\n\n\nasync abatch(inputs: List[Input], config: Optional[Union[RunnableConfig, List[RunnableConfig]]] = None, *, return_exceptions: bool = False, **kwargs: Optional[Any]) → List[Output]¶\nDefault implementation runs ainvoke in parallel using asyncio.gather.\nThe default implementation of batch works well

## Agent

In [None]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
)