In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from app import langsmith_rag

question = "How do I set up tracing to LangSmith with @traceable?"
langsmith_rag(question)

"To set up tracing to LangSmith with the @traceable decorator in Python, ensure that you set the LANGSMITH_TRACING environment variable to 'true' and the LANGSMITH_API_KEY environment variable to your API key. Then, simply decorate any function you wish to trace with @traceable. Make sure to use the await keyword when calling wrapped sync functions to ensure the trace is logged correctly."

In [6]:
from langsmith import Client

example_dataset = [
    (
        "What are the benefits of artificial intelligence in healthcare?",
        """Artificial intelligence is transforming healthcare by enabling faster and more accurate diagnoses, personalized treatment plans, and predictive analytics for disease prevention. AI-powered tools assist doctors in analyzing medical images, identifying patterns in patient data, and recommending evidence-based treatments. Machine learning algorithms can process vast amounts of medical research to stay current with the latest findings.""",
        "AI in healthcare improves diagnostic accuracy, enables personalized medicine, accelerates drug discovery, and helps predict patient outcomes, ultimately leading to better patient care and reduced costs."
    ),
    (
        "How does climate change affect global food security?",
        """Climate change poses significant threats to global food security through extreme weather events, shifting precipitation patterns, and rising temperatures. Droughts, floods, and unpredictable seasons disrupt agricultural production, while changing climates enable pest and disease spread. Coastal areas face saltwater intrusion into farmland, and warming oceans affect fish populations critical for nutrition in many regions.""",
        "Climate change threatens food security by reducing crop yields, disrupting agricultural systems, increasing food price volatility, and forcing migration from affected regions, particularly impacting vulnerable populations in developing countries."
    ),
    (
        "What is the future of renewable energy technology?",
        """Renewable energy technology is rapidly advancing with improvements in solar panel efficiency, battery storage capacity, and wind turbine design. Emerging technologies include green hydrogen, advanced geothermal systems, and next-generation nuclear reactors. Grid modernization and smart energy management systems are enabling better integration of renewable sources, while costs continue to decline making clean energy increasingly competitive with fossil fuels.""",
        "The future of renewable energy involves more efficient solar and wind technologies, breakthrough battery storage solutions, widespread adoption of green hydrogen, and smart grid systems that can balance variable renewable sources to create a sustainable energy infrastructure."
    ),
]



client = Client()
dataset_name = "Technology and Global Challenges"

# Create dataset
dataset = client.create_dataset(
    dataset_name=dataset_name, description="AI, climate change, and renewable energy insights"
)

# Prepare inputs and outputs
inputs = [{"question": q, "context": c} for q, c, _ in example_dataset]
outputs = [{"output": o} for _, _, o in example_dataset]

# Create examples in the dataset
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)


{'example_ids': ['bfbb4673-9f4c-42df-ad57-8e536cc0f9eb',
  '85a9a5dd-b3b3-4fd7-82d0-117ecd73f999',
  'f5f47333-3bd5-4d58-9883-d29d971db555'],
 'count': 3}

In [11]:
# Create a LANGSMITH_API_KEY in Settings > API Keys
from langsmith import Client
prompt = client.pull_prompt("deep", include_model=True)

In [15]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from langsmith.client import convert_prompt_to_openai_format
from openai import OpenAI
from typing import List
import nest_asyncio

MODEL_NAME = "gpt-4o-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0

# TODO: Remove this hard-coded prompt and replace it with Prompt Hub

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    # TODO: Let's use our prompt pulled from Prompt Hub instead of manually formatting here!
    formatted_prompt = prompt.invoke({"context":formatted_docs, "question": question})
    messages = convert_prompt_to_openai_format([formatted_prompt])["messages"]

    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


In [16]:
question = "What are the benefits of artificial intelligence in healthcare?"
langsmith_rag(question)

'Artificial intelligence offers numerous benefits in healthcare, including improved diagnostic accuracy through advanced image analysis, personalized treatment plans tailored to individual patient data, and predictive analytics that help in disease prevention. AI tools can process vast amounts of medical research quickly, keeping healthcare providers updated with the latest findings. Additionally, AI accelerates drug discovery, optimizes operational efficiencies, and ultimately contributes to better patient outcomes while potentially reducing healthcare costs.'