# import

In [23]:
import pprint
import os
import sys

from dotenv import load_dotenv

from langchain_ollama import OllamaLLM, ChatOllama, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import FAISS

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever

from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage

import psycopg
from langchain_postgres.vectorstores import PGVector
from SPARQLWrapper import SPARQLWrapper, JSON, POST
from urllib.parse import urljoin

# loading env variables

In [24]:
load_dotenv()

True

# Langain & Ollama

## check ollama status

In [5]:
# !curl --location 'http://127.0.0.1:11434/api/generate' \
# --header 'Content-Type: application/json' \
# --data '{ \
#     "model": "llama3.2:3b", \
#     "prompt": "hello llama!",  \
#     "options": { \
#         "temperature": 0 \
#     } \
# }' \
# | python -m json.tool

In [6]:
# !curl http://localhost:11434/api/tags

## Ollama model: configuration

In [6]:
llm = OllamaLLM(model="llama3.2:3b", temperature=0)
# llm = OllamaLLM(model="deepseek-r1:8b", temperature=0)
llm

OllamaLLM(model='llama3.2:3b', temperature=0.0)

In [9]:
chat_ollama = ChatOllama(model="llama3.2:3b", temperature=0)
chat_ollama

ChatOllama(model='llama3.2:3b', temperature=0.0)

## testing llm: invoke


In [7]:
# llm.invoke(input="tell me a joke")
response = llm.invoke("hello ollama!")

# response = llm.invoke("Create an agent that uses Ollama function calling in Langchain.")

print(response)

Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?


In [10]:
messages = [
    ("system", "You are a helpful translator. Translate the user sentence to French."),
    ("human", "I love programming."),
]
chat_ollama.invoke(messages)

AIMessage(content='Je aime programmer.', additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2025-04-14T19:07:45.0711592Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2673019600, 'load_duration': 2457519600, 'prompt_eval_count': 42, 'prompt_eval_duration': 153058000, 'eval_count': 5, 'eval_duration': 57947000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-87d0096c-961f-4def-8049-07069bb59937-0', usage_metadata={'input_tokens': 42, 'output_tokens': 5, 'total_tokens': 47})

## testing llm: chat prompt template

In [36]:
chat_prompt_template = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
]) 

chain = chat_prompt_template | llm

response = chain.invoke({"input": "how can langsmith help with testing?"})

print(response)

<think>
Okay, so I'm trying to figure out how LangSmith can help with testing. I remember that LangSmith is some kind of AI tool related to language processing, maybe for writing or something like that. But I'm not exactly sure about its specific features beyond generating text.

The user mentioned testing in their question, so I guess they're asking if LangSmith has any features that assist in testing processes. Testing can be a broad term—like software testing, quality assurance, user acceptance testing, etc.—so I need to think about how an AI tool like LangSmith might fit into these contexts.

First, maybe LangSmith can help with automated testing. If it's capable of generating text based on inputs, perhaps it can create test cases or scenarios automatically. That would save time compared to manual testing. But I'm not sure if LangSmith has that feature or not.

Another angle is using LangSmith for functional testing. If you're testing a system's functionality, maybe LangSmith can s

## testing llm: chat prompt template & StrOutputParser

In [37]:
chat_prompt_template = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
]) 

output_parser = StrOutputParser()

chain = chat_prompt_template | llm | output_parser

response = chain.invoke({"input": "how can langsmith help with testing?"})

print(response)

<think>
Okay, so I'm trying to figure out how LangSmith can help with testing. I remember that LangSmith is some kind of AI tool related to language processing, maybe for writing or something like that. But I'm not exactly sure about its features beyond generating text.

The user mentioned testing in their question, so I guess they're asking if LangSmith can be used for testing purposes. Hmm, how does that work? Well, testing usually involves checking if a system works as expected, right? So maybe LangSmith can help test other AI systems or applications?

Wait, but LangSmith is more about generating text. Maybe it's used to create test cases or scenarios for testing something else. Or perhaps it can simulate user interactions to see how well another system responds. That could be useful for testing chatbots or other language-based applications.

Another thought: maybe LangSmith can help in automating tests. Like, if you have a lot of test cases, LangSmith could generate the necessary i

## create vector store & a retriever

In [29]:
# 1. select a specfic datasource. In this case a web page. 
# 2. save extracted content from the web page as docs.
# 3. index the docs using FAISS vector store.
# 4. convert the vector store to retriever.

web_base_loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")

docs = web_base_loader.load()

# print(f"type(docs) : {type(docs)} \n")
# print(f"len(docs) : {len(docs)}\n")
# print(f"docs: {docs} \n")
# type(f"docs[0] : {docs[0]} \n")
# print(f"docs[0].page_content : {docs[0].page_content} \n")

recursive_character_text_splitter = RecursiveCharacterTextSplitter()
documents = recursive_character_text_splitter.split_documents(documents=docs)


# print(type(documents))
# print(len(documents))
# print(documents)
# print(documents[0])
# print(documents[2])

ollama_embedding = OllamaEmbeddings(model="llama3.2:3b")
vector_store = FAISS.from_documents(documents=documents, embedding=ollama_embedding)


# print(f"vector_store.index.ntotal: {vector_store.index.ntotal}")
# print(f"vector_store._get_retriever_tags() : {vector_store._get_retriever_tags()}")
# print(f"vector_store.index_to_docstore_id : {vector_store.index_to_docstore_id}") 
# print(f"type(vector_store.index_to_docstore_id) : {type(vector_store.index_to_docstore_id)}") 

vector_store_retriever = vector_store.as_retriever()
print(f"vector_store_retriever: {vector_store_retriever}")

vector_store_retriever: tags=['FAISS', 'OllamaEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000233810D3DD0> search_kwargs={}


## document chain

In [30]:
# 5. create a chat prompt template
# 6. create a stuff document chain that accepts a llm model and chat prompt template & we can also run stuff document chain by passing in documents directly

chat_prompt_template = ChatPromptTemplate.from_template(
"""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}"""
)

documents_chain = create_stuff_documents_chain(llm=llm, prompt=chat_prompt_template)
response = documents_chain.invoke(
    {
        "input": "how can langsmith help with testing?",
        "context": documents        
    }
)
print(response)

There is no information provided in the context about LangSmith's capabilities or features related to testing. The text only mentions that LangSmith is a project or organization (indicated by the "LangSmith SDK" and "LangChain Python Docs" links), but it does not provide any details on how it can be used for testing.


## retrieval chain

In [31]:
# 7. create a document retrieval chain that takes vector store retriever and stuff document chain

retrieval_chain = create_retrieval_chain(vector_store_retriever, documents_chain) 
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})

# print(type(response))
pprint.pprint(response, indent=4)

{   'answer': 'There is no information provided in the context about '
              "LangSmith's capabilities or features related to testing. The "
              'text only mentions that LangSmith is a project or organization '
              '(indicated by the "LangSmith SDK" and "LangChain Python Docs" '
              'links), but it does not provide any details on how it can be '
              'used for testing.',
    'context': [   Document(metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': '🦜️🛠️ LangSmith', 'language': 'en'}, page_content='🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Pytho

## conversation retrieval chain

In [32]:
chat_prompt_template = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up to get information relevant to the conversation")
])

history_aware_retriever_chain = create_history_aware_retriever(llm, vector_store_retriever, chat_prompt_template)

In [33]:
chat_prompt_template = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}")
])

document_chain = create_stuff_documents_chain(llm, chat_prompt_template)
retrieval_chain = create_retrieval_chain(history_aware_retriever_chain, document_chain)

chat_history = [HumanMessage(content="Can LangSmith help test my LLM applications?"), AIMessage(content="Yes!")]

response = retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "tell me how"
})

pprint.pprint(response)

{'answer': "We'd be happy to help you test your Large Language Model (LLM) "
           'applications. Here are some ways we can assist:\n'
           '\n'
           '1. **Conversational Testing**: We can engage in conversations with '
           'your LLM, providing it with a variety of prompts and scenarios to '
           'test its understanding, accuracy, and response quality.\n'
           '2. **Error Identification**: Our team can help identify errors or '
           "biases in your LLM's responses, such as incorrect information, "
           'inconsistencies, or inappropriate content.\n'
           '3. **Performance Evaluation**: We can evaluate the performance of '
           'your LLM on specific tasks, such as answering questions, '
           'generating text, or completing tasks.\n'
           '4. **Data Quality Assessment**: We can assess the quality and '
           "relevance of the data used to train your LLM, ensuring it's "
           'accurate, diverse, and up-to-da

# embeddings

### initialize embedding model

In [12]:
# ollama_embedding = OllamaEmbeddings(model="mxbai-embed-large:335m")
# ollama_embedding = OllamaEmbeddings(model="nomic-embed-text:latest")
ollama_embedding = OllamaEmbeddings(model="bge-m3:567m")

### connect to pgvector

In [13]:
# Format: postgresql+psycopg2://user:password@host:port/dbname
# Database Connection Details
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

CONNECTION_STRING = f"postgresql+psycopg://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
COLLECTION_NAME = "dbpedia_docs"

In [None]:
print(f"\nConnecting to PGVector '{COLLECTION_NAME}'...")
try:
    # If the collection table doesn't exist, PGVector will try to create it.
    vectorstore = PGVector(
        connection=CONNECTION_STRING,
        embeddings=ollama_embedding,
        collection_name=COLLECTION_NAME,
        use_jsonb=True
        # pre_delete_collection=True
        # Use pre_delete_collection=True if you want to clear the collection on every run (USE WITH CAUTION!)
        # pre_delete_collection=False,
    )
    print(f"connection successfull!")
except psycopg.OperationalError as e:
    print(f"\nDatabase Connection Error: {e}")
    exit(1)
except Exception as e:
    print(f"\nAn error occurred during PGVector connection: {e}")
    exit(1)


Connecting to PGVector 'dbpedia_docs'...
connection successfull!


### connect to ontotext graph db and fetch all the entities

In [47]:
GRAPHDB_BASE_URL = os.getenv("GRAPHDB_BASE_URL")
GRAPHDB_REPOSITORY = os.getenv("GRAPHDB_REPOSITORY")

# Format: {base_url}/repositories/{repository_id}
SPARQL_ENDPOINT = urljoin(GRAPHDB_BASE_URL.strip('/') + '/', f"repositories/{GRAPHDB_REPOSITORY}")

In [48]:
query = r"""
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT ?class
FROM <http://dbpedia.org/model>
WHERE {
  ?class a owl:Class .
  FILTER (
    regex(STRAFTER(STR(?class), "http://dbpedia.org/ontology/"), "^[\\x00-\\x7F]+$")
  )
}
"""
sparql = SPARQLWrapper(SPARQL_ENDPOINT)
sparql.setReturnFormat(JSON)

entitiies = []

try:
    sparql.setQuery(query)
    results = sparql.query().convert()
    for result in results["results"]["bindings"]:
        entitiies.append(result["class"]["value"])
        # print(result["class"]["value"])
except Exception as e:
    print("Error:", e)

In [49]:
len(entitiies), entitiies[:2]

(639,
 ['http://dbpedia.org/ontology/AcademicConference',
  'http://dbpedia.org/ontology/AcademicJournal'])