In [24]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ChatMessageHistory
import gradio as gr

llm = ChatOllama(model="llama3")

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | llm

chat_history = ChatMessageHistory()

def respond(message, history):
    chat_history.add_user_message(message)
    response = chain.invoke({"messages": chat_history.messages})
    chat_history.add_ai_message(response.content)
    return "Bot: " + response.content

chat_interface = gr.ChatInterface(fn=respond)
chat_interface.launch(share=True)




* Running on local URL:  http://127.0.0.1:7863

Could not create share link. Missing file: /Users/adudi/Documents/projects/ollama_agent/venv/lib/python3.10/site-packages/gradio/frpc_darwin_arm64_v0.3. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_darwin_arm64
2. Rename the downloaded file to: frpc_darwin_arm64_v0.3
3. Move the file to this location: /Users/adudi/Documents/projects/ollama_agent/venv/lib/python3.10/site-packages/gradio







In [3]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(model="llama3")

prompt = ChatPromptTemplate.from_template("Who own 2025 {sport}?")

chain = prompt | llm | StrOutputParser()

print(chain.invoke({"sport": "superbowl"}))


The ownership of Super Bowl LVIII (58) in 2025 has not been officially announced by the National Football League (NFL). However, the NFL typically announces the host city and stadium for each Super Bowl several years in advance.

As of now, there have been no official announcements regarding the location or ownership of Super Bowl LVIII. The NFL usually makes these announcements at their annual Fall Meeting, which takes place in September or October.


In [4]:
from langchain_core.messages import HumanMessage

llm.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to French: I love programming."
        )
    ]
)




AIMessage(content='Here is the translation:\n\nJ\'adore le programmation.\n\nNote: "programmation" is a noun, so it\'s capitalized in French.', additional_kwargs={}, response_metadata={'model': 'llama3', 'created_at': '2025-02-10T22:59:44.812291Z', 'done': True, 'done_reason': 'stop', 'total_duration': 4008311792, 'load_duration': 578571250, 'prompt_eval_count': 22, 'prompt_eval_duration': 1782000000, 'eval_count': 32, 'eval_duration': 1646000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-84fa2bad-635c-4cb0-aa10-36e8d86aabab-0', usage_metadata={'input_tokens': 22, 'output_tokens': 32, 'total_tokens': 54})

In [5]:
# the model on its own does not have any concept of state

llm.invoke([HumanMessage(content="What did you just say?")])

AIMessage(content="I apologize, but this conversation has just started. I haven't said anything yet! I'm here to help answer your questions or have a chat with you. What would you like to talk about?", additional_kwargs={}, response_metadata={'model': 'llama3', 'created_at': '2025-02-10T23:01:33.55896Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2798170542, 'load_duration': 34075458, 'prompt_eval_count': 16, 'prompt_eval_duration': 620000000, 'eval_count': 41, 'eval_duration': 2143000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-b0ee28e7-aabf-4341-aa48-c519b9fa88f8-0', usage_metadata={'input_tokens': 16, 'output_tokens': 41, 'total_tokens': 57})

In [6]:
# to get around this, we need to pass in the entire conversation history into the model as context 

from langchain_core.messages import AIMessage


llm.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to French: I love programming."
        ),
        AIMessage(content="J'adore la programmation."),
        HumanMessage(content="What did you just say?"),
    ]
)

AIMessage(content='I translated the sentence "I love programming" from English to French, and the French equivalent is:\n\nJ\'adore la programmation.\n\nWhich means: I adore (or really enjoy) programming.', additional_kwargs={}, response_metadata={'model': 'llama3', 'created_at': '2025-02-10T23:03:19.197847Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3371904833, 'load_duration': 36969833, 'prompt_eval_count': 46, 'prompt_eval_duration': 922000000, 'eval_count': 41, 'eval_duration': 2141000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-f8e01d64-df7d-4a1f-82b3-4e97e8f4db3f-0', usage_metadata={'input_tokens': 46, 'output_tokens': 41, 'total_tokens': 87})

In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | llm

In [9]:
from langchain.memory import ChatMessageHistory

chat_history = ChatMessageHistory()

chat_history.add_user_message(
    "Translate this sentence from English to French: I love programming."
)

response = chain.invoke({"messages": chat_history.messages})

response

AIMessage(content='The translation of "I love programming" in French is:\n\nJ\'adore le programmation.\n\nHere\'s a breakdown of the translation:\n\n* "I" becomes "J\'" (je)\n* "love" becomes "adore"\n* "programming" becomes "le programmation"\n\nNote: "le" is used because "programmation" starts with a vowel sound, so it takes an indefinite article "le" instead of "la".', additional_kwargs={}, response_metadata={'model': 'llama3', 'created_at': '2025-02-10T23:07:25.374022Z', 'done': True, 'done_reason': 'stop', 'total_duration': 6397327875, 'load_duration': 81974792, 'prompt_eval_count': 43, 'prompt_eval_duration': 1346000000, 'eval_count': 92, 'eval_duration': 4968000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-4b337d74-c013-4cd5-bec9-721fe7887050-0', usage_metadata={'input_tokens': 43, 'output_tokens': 92, 'total_tokens': 135})

In [10]:
response.content

'The translation of "I love programming" in French is:\n\nJ\'adore le programmation.\n\nHere\'s a breakdown of the translation:\n\n* "I" becomes "J\'" (je)\n* "love" becomes "adore"\n* "programming" becomes "le programmation"\n\nNote: "le" is used because "programmation" starts with a vowel sound, so it takes an indefinite article "le" instead of "la".'

In [11]:
chat_history.add_ai_message(response.content)
chat_history.add_user_message("What did you say?")
chain.invoke({"messages": chat_history.messages})

AIMessage(content='I translated the sentence "I love programming" from English to French. The translation is:\n\nJ\'adore le programmation.\n\nLet me know if you need any further assistance!', additional_kwargs={}, response_metadata={'model': 'llama3', 'created_at': '2025-02-10T23:09:44.508661Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2298417958, 'load_duration': 31807375, 'prompt_eval_count': 149, 'prompt_eval_duration': 363000000, 'eval_count': 37, 'eval_duration': 1900000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-38cc9a16-1f8a-4d80-8981-30406e85cbfd-0', usage_metadata={'input_tokens': 149, 'output_tokens': 37, 'total_tokens': 186})

In [13]:
import gradio as gr

def respond(message, history):
    chat_history.add_user_message(message)
    response = chain.invoke({"messages": chat_history.messages})
    chat_history.add_ai_message(response.content)
    return "Bot: " + response.content

chat_interface = gr.ChatInterface(fn=respond)
chat_interface.launch(share=True)



* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Missing file: /Users/adudi/Documents/projects/ollama_agent/venv/lib/python3.10/site-packages/gradio/frpc_darwin_arm64_v0.3. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_darwin_arm64
2. Rename the downloaded file to: frpc_darwin_arm64_v0.3
3. Move the file to this location: /Users/adudi/Documents/projects/ollama_agent/venv/lib/python3.10/site-packages/gradio







In [16]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
data = loader.load()




In [17]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [20]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings(model="llama3"))

In [21]:
# k is the number of chunks to retrieve
retriever = vectorstore.as_retriever(k=4)

docs = retriever.invoke("how can langsmith help with testing?")

docs

[Document(id='8fb2ee91-7b8a-42b9-8516-91fd84728282', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en', 'source': 'https://docs.smith.langchain.com/overview', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith'}, page_content='Get started by creating your first prompt.\nIterate on models and prompts using the Playground.\nManage prompts programmatically in your application.\nWas this page helpful?You can leave detailed feedback on GitHub.NextQuick StartObservabilityEvalsPrompt EngineeringCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright © 2025 LangChain, Inc.'),
 Document(id='a45e9e58-a375-4bc5-9d9d-8f01881694f4', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en', 'source': 'https://docs.smith.langchain.com/overview', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith'},

In [26]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ChatMessageHistory


llm = ChatOllama(model="llama3")

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions using this context: \n\n{context}"
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

document_chain = create_stuff_documents_chain(llm, prompt)
demo_ephemeral_chat_history = ChatMessageHistory()


In [27]:
demo_ephemeral_chat_history.add_user_message("how can langsmith help with testing?")

document_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
        "context": docs,
    }
)

'According to the provided context, LangSmith helps with testing through its Evals feature. This allows you to build and run high-quality evaluations easily, which is crucial for testing and optimizing your AI applications. With LangSmith, you can create dashboards to view key metrics like RPS (requests per second), error rates, and costs, giving you a better understanding of how your application performs.'

In [28]:
from typing import Dict

from langchain_core.runnables import RunnablePassthrough


def parse_retriever_input(params: Dict):
    return params["messages"][-1].content


retrieval_chain = RunnablePassthrough.assign(
    context=parse_retriever_input | retriever,
).assign(
    answer=document_chain,
)

In [29]:
response = retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    }
)

response

{'messages': [HumanMessage(content='how can langsmith help with testing?', additional_kwargs={}, response_metadata={})],
 'context': [Document(id='8fb2ee91-7b8a-42b9-8516-91fd84728282', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en', 'source': 'https://docs.smith.langchain.com/overview', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith'}, page_content='Get started by creating your first prompt.\nIterate on models and prompts using the Playground.\nManage prompts programmatically in your application.\nWas this page helpful?You can leave detailed feedback on GitHub.NextQuick StartObservabilityEvalsPrompt EngineeringCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright © 2025 LangChain, Inc.'),
  Document(id='a45e9e58-a375-4bc5-9d9d-8f01881694f4', metadata={'description': 'LangSmith is a platform for building production-grade LLM application

In [30]:
demo_ephemeral_chat_history.add_ai_message(response["answer"])

demo_ephemeral_chat_history.add_user_message("tell me more about that!")

retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    },
)

{'messages': [HumanMessage(content='how can langsmith help with testing?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="According to the context, LangSmith's Evals feature makes building and running high-quality evaluations easy, which is crucial for testing AI applications. This means that LangSmith provides a set of tools to help you test your AI applications efficiently and effectively.\n\nWith LangSmith's Evals, you can:\n\n* Create high-quality evaluation datasets\n* Test and optimize your AI applications quickly\n\nBy using LangSmith's Evals feature, you can ensure the quality and development speed of your AI applications are optimal.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='tell me more about that!', additional_kwargs={}, response_metadata={})],
 'context': [Document(id='8fb2ee91-7b8a-42b9-8516-91fd84728282', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en', 'so

In [31]:
retrieval_chain_with_only_answer = (
    RunnablePassthrough.assign(
        context=parse_retriever_input | retriever,
    )
    | document_chain
)

retrieval_chain_with_only_answer.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    },
)

"According to the context, LangSmith's Evals feature helps with testing by providing a set of tools for building and running high-quality evaluations. This means that LangSmith has native support for evaluating AI models, making it easier to test and optimize your applications.\n\nHere are some potential benefits of using LangSmith's Evals:\n\n* **Efficient evaluation**: LangSmith's Evals allows you to create and run evaluations quickly, which can save you time and effort in testing your AI applications.\n* **High-quality datasets**: With LangSmith, you can easily create high-quality evaluation datasets that accurately test your AI models. This ensures that your tests are reliable and meaningful.\n* **Model optimization**: By providing insights into model performance, LangSmith's Evals helps you optimize your AI models for better results.\n\nOverall, LangSmith's Evals is designed to help you test and refine your AI applications more efficiently, which can lead to higher-quality results

In [33]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableBranch

# We need a prompt that we can pass into an LLM to generate a transformed search query

chat = ChatOllama(model="llama3")

query_transform_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else.",
        ),
    ]
)

query_transforming_retriever_chain = RunnableBranch(
    (
        lambda x: len(x.get("messages", [])) == 1,
        # If only one message, then we just pass that message's content to retriever
        (lambda x: x["messages"][-1].content) | retriever,
    ),
    # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
    query_transform_prompt | chat | StrOutputParser() | retriever,
).with_config(run_name="chat_retriever_chain")

In [36]:
document_chain = create_stuff_documents_chain(chat, prompt)

conversational_retrieval_chain = RunnablePassthrough.assign(
    context=query_transforming_retriever_chain,
).assign(
    answer=document_chain,
)

demo_ephemeral_chat_history = ChatMessageHistory()

In [37]:
demo_ephemeral_chat_history.add_user_message("how can langsmith help with testing?")

response = conversational_retrieval_chain.invoke(
    {"messages": demo_ephemeral_chat_history.messages},
)

demo_ephemeral_chat_history.add_ai_message(response["answer"])

response

{'messages': [HumanMessage(content='how can langsmith help with testing?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="LangSmith can help with testing through its Evals feature, which makes it easy to build and run high-quality evaluation datasets and metrics for AI applications. This allows you to test and optimize your applications more efficiently.\n\nWith LangSmith's Evals, you can:\n\n1. Create evaluation datasets: Define the inputs and expected outputs for your AI application, making it easier to test its performance.\n2. Run evaluations: Use LangSmith's SDK or UI to run your evaluation datasets against your AI application, getting accurate results on how well it performs.\n3. Track key metrics: View key metrics such as RPS (requests per second), error rates, and costs to monitor the performance of your AI application.\n\nBy leveraging LangSmith's Evals feature, you can:\n\n* Faster development cycles: Focus on improving your AI application's performance wit

In [39]:
demo_ephemeral_chat_history.add_user_message("tell me more about that!")

conversational_retrieval_chain.invoke(
    {"messages": demo_ephemeral_chat_history.messages}
)

{'messages': [HumanMessage(content='how can langsmith help with testing?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="LangSmith can help with testing through its Evals feature, which makes it easy to build and run high-quality evaluation datasets and metrics for AI applications. This allows you to test and optimize your applications more efficiently.\n\nWith LangSmith's Evals, you can:\n\n1. Create evaluation datasets: Define the inputs and expected outputs for your AI application, making it easier to test its performance.\n2. Run evaluations: Use LangSmith's SDK or UI to run your evaluation datasets against your AI application, getting accurate results on how well it performs.\n3. Track key metrics: View key metrics such as RPS (requests per second), error rates, and costs to monitor the performance of your AI application.\n\nBy leveraging LangSmith's Evals feature, you can:\n\n* Faster development cycles: Focus on improving your AI application's performance wit

Reference:

https://python.langchain.com/v0.1/docs/use_cases/chatbots/quickstart/#quickstart-1


Questions:

1. why chaining? what is langchain doing?
2. Embedding models - which are the popular ones?
3. Chroma db, store the embeddings 
4. What are the alternatives to langchain? 
5. Why is Ollama required here?
6. So we are just inferring here? what would inteference platforms like groq do?
7. langsmith - to inspect internals?
8. langsmith eval / test framework?