# Server Side Code Langserve 

> A server-side delivery of langchain agents and chains.

In [None]:
#| default_exp backend

In [2]:
#| export
import asyncio

import os
from operator import itemgetter

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langchain.chat_models import ChatOllama

from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

from langchain.prompts import (ChatPromptTemplate, MessagesPlaceholder,
                               PromptTemplate)
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, HumanMessage
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import (Runnable, RunnableBranch,
                                       RunnableLambda, RunnableMap)
from langchain.vectorstores import Chroma
from langserve import add_routes
from langchain.pydantic_v1 import BaseModel

from langchain.llms import Ollama
from langchain.memory import ConversationBufferMemory

from langserve import add_routes

from langchain.agents import AgentExecutor
from langchain.tools.render import render_text_description, render_text_description_and_args

from langchain.agents import load_tools
from langchain import hub
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
from langchain.agents.output_parsers import ReActSingleInputOutputParser

from alhazen.utils.jats_text_extractor import NxmlDoc
from alhazen.utils.output_parsers import ReActJsonSingleInputOutputParser_llama2

from typing import Dict, List, Optional, Sequence, Union
from uuid import UUID

# Hack to fix Incorrect formatting for Llama Chat models
from alhazen.utils.langchain_utils import ChatPromptValue_to_string
from langchain.prompts.chat import ChatPromptValue

In [3]:
# Code from the Langchain Chat repo

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
    expose_headers=["*"],
)

class ChatRequest(BaseModel):
    question: str
    chat_history: Optional[List[Dict[str, str]]]

def get_embeddings_model() -> Embeddings:
    return SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    #return OpenAIEmbeddings(chunk_size=200)

def get_retriever() -> BaseRetriever:
    chromadb_client = Chroma("langchain_store")
    return chromadb_client.as_retriever()

def create_retriever_chain(
    llm: BaseLanguageModel, retriever: BaseRetriever
) -> Runnable:
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
    condense_question_chain = (
        CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
    ).with_config(
        run_name="CondenseQuestion",
    )
    conversation_chain = condense_question_chain | retriever
    return RunnableBranch(
        (
            RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                run_name="HasChatHistoryCheck"
            ),
            conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
        ),
        (
            RunnableLambda(itemgetter("question")).with_config(
                run_name="Itemgetter:question"
            )
            | retriever
        ).with_config(run_name="RetrievalChainWithNoHistory"),
    ).with_config(run_name="RouteDependingOnChatHistory")

def format_docs(docs: Sequence[Document]) -> str:
    formatted_docs = []
    for i, doc in enumerate(docs):
        doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
        formatted_docs.append(doc_string)
    return "\n".join(formatted_docs)

def serialize_history(request: ChatRequest):
    chat_history = request["chat_history"] or []
    converted_chat_history = []
    for message in chat_history:
        if message.get("human") is not None:
            converted_chat_history.append(HumanMessage(content=message["human"]))
        if message.get("ai") is not None:
            converted_chat_history.append(AIMessage(content=message["ai"]))
    return converted_chat_history

def create_chain(
    llm: BaseLanguageModel,
    retriever: BaseRetriever,
) -> Runnable:
    retriever_chain = create_retriever_chain(
        llm,
        retriever,
    ).with_config(run_name="FindDocs")
    _context = RunnableMap(
        {
            "context": retriever_chain | format_docs,
            "question": itemgetter("question"),
            "chat_history": itemgetter("chat_history"),
        }
    ).with_config(run_name="RetrieveDocs")
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", RESPONSE_TEMPLATE),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{question}"),
        ]
    )

    response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
        run_name="GenerateResponse",
    )
    return (
        {
            "question": RunnableLambda(itemgetter("question")).with_config(
                run_name="Itemgetter:question"
            ),
            "chat_history": RunnableLambda(serialize_history).with_config(
                run_name="SerializeHistory"
            ),
        }
        | _context
        | response_synthesizer
    )

llm = ChatOllama(model='llama2:70b')
retriever = get_retriever()
answer_chain = create_chain(
    llm,
    retriever,
)

add_routes(app, answer_chain, path="/chat", input_type=ChatRequest)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)

NameError: name 'REPHRASE_TEMPLATE' is not defined

In [3]:
#| export

# Provides access to an agent interface via langserve.
# Very useful for NextJS applications + demos

import nest_asyncio
from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit
from langchain.tools.playwright.utils import (
    create_async_playwright_browser,  # A synchronous browser is available, though it isn't compatible with jupyter.
)
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import JSONAgentOutputParser
from alhazen.utils.output_parsers import JsonEnclosedByTextOutputParser

nest_asyncio.apply()

#async_browser = create_async_playwright_browser()
#browser_toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)
#tools = browser_toolkit.get_tools()
tools = tools = load_tools(["ddg-search", "pubmed", "arxiv"], )

prompt = hub.pull("hwchase17/react-multi-input-json")
prompt = prompt.partial(
    tools=render_text_description_and_args(tools),
    tool_names=", ".join([t.name for t in tools]),
)

chat_model = ChatOllama(model='llama2:70b')
chat_model_with_stop = chat_model.bind(stop=["\nObservation"])
ChatPromptValue.to_string = ChatPromptValue_to_string

#llm = Ollama(model='llama2:70b')
#llm_with_stop = llm.bind(stop=["\nObservation"])

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
    }
    | prompt
    | chat_model_with_stop
    | JSONAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# We need to add these input/output schemas because the current AgentExecutor
# is lacking in schemas.
class ChatRequest(BaseModel):
    question: str
    chat_history: Optional[List[Dict[str, str]]]

class Input(BaseModel):
    input: str

class Output(BaseModel):
    output: str


In [4]:
#| export

app = FastAPI(
    title="Alhazen Server",
    version="0.0.1",
    description="An api server using Langchain's Runnable interfaces for Alhazen",
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
    expose_headers=["*"],
)

# Adds routes to the app for using the chain under:
# /invoke
# /batch
# /stream
add_routes(app, agent_executor, input_type=Input, output_type=Output)

In [10]:
import requests
response = requests.post(
    "http://localhost:8080/invoke/",
    json={'input': {'input': 'What is Tom Cruise\'s height?'}}
)
response.json()

{'output': {'output': "Tom Cruise's height is 5 feet 7 inches (1.70 meters)"},
 'callback_events': [],
 'metadata': {'run_id': '562b570c-ade1-4be4-b6eb-5d44bea63c60'}}

In [4]:
response = agent_executor.invoke(
    {"input": "Search for papers about cancer."}
)
print(response["output"])

#input = {
#    "input": "Search for papers about cancer.",
#    'agent_scratchpad': ""
#}       
#
#agent_executor.with_types(input_type=Input, output_type=Output).invoke(input)



[1m> Entering new AgentExecutor chain...[0m


KeyboardInterrupt: 

In [2]:
#| export

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="localhost", port=8080)

KeyboardInterrupt: 