In [1]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

In [4]:
from app import langsmith_rag

question = "What are some examples of constellations?"
langsmith_rag(question)

'The retrieved context does not provide specific examples of constellations. However, common examples of constellations include Orion, Ursa Major, and Cassiopeia. If you need detailed information, I can look it up for you.'

In [5]:
from langsmith import Client

astronomy_dataset = [
    (
        "What causes the phases of the Moon?",
        "The Moon’s phases result from its orbit around Earth, which changes the portion of its sunlit side visible to us. The Moon itself doesn’t change shape — we see different illuminated fractions depending on its position relative to Earth and the Sun.",
        "Lunar phases occur as the Moon orbits Earth, showing varying sunlit portions to us."
    ),
    (
        "What is a light-year and what does it measure?",
        "A light-year measures distance, not time: it’s the distance light travels in one year — about 9.46 trillion kilometers (5.88 trillion miles). Astronomers use it to express vast interstellar distances.",
        "A light-year is the distance light travels in a year (≈9.46 trillion km)."
    ),
    (
        "Why do stars twinkle when viewed from Earth?",
        "Starlight is distorted as it passes through turbulent layers of Earth’s atmosphere, causing rapid changes in brightness and position — the apparent twinkling effect. Planets twinkle less because they appear as disks, not point sources.",
        "Stars twinkle due to atmospheric turbulence bending their light as it reaches us."
    ),
]

client = Client()
dataset_name = "Astronomy Basics"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Basic concepts and phenomena in astronomy"
)

inputs = [{"question": q, "context": c} for q, c, _ in astronomy_dataset]
outputs = [{"output": o} for _, _, o in astronomy_dataset]

client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)

{'example_ids': ['1601a93d-6acd-478d-bc43-d5124387ec13',
  'b7066e57-a41a-4b57-942c-1571ead54068',
  '0c6ecc67-7628-4e1c-91ed-7f4f658647dd'],
 'count': 3}

In [6]:
from langsmith import Client
client = Client()
prompt = client.pull_prompt("astronomy-basics", include_model=True)

                extra_headers was transferred to model_kwargs.
                Please confirm that extra_headers is what you intended.
  obj, end = self.raw_decode(s, idx=_w(s, 0).end())


In [7]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from langsmith.client import convert_prompt_to_openai_format as convert
from openai import OpenAI
from typing import List
import nest_asyncio

MODEL_NAME = "gpt-4.1-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0



openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    response = prompt.invoke({
        "question": question,
        "context": formatted_docs
    })
    
    return response

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    
    if hasattr(response, "content"):
        return response.content
    if isinstance(response, dict):
        if "choices" in response and response["choices"]:
            choice = response["choices"][0]
            msg = choice.get("message") or {}
            
            if isinstance(msg, dict) and "content" in msg:
                return msg["content"]
            if "content" in response:
                return response["content"]
        if "content" in response:
            return response["content"]
    
    return str(response)


In [8]:
questions = [
    "What causes the phases of the Moon?",
    "What is a light-year and what does it measure?",
    "Why do stars twinkle when viewed from Earth?",
]

for i in questions:
    print(f"Question: {i}")
    print(f"Response: {langsmith_rag(i)}\n")

Question: What causes the phases of the Moon?
Response: The phases of the Moon are caused by the changing relative positions of the Earth, Moon, and Sun. As the Moon orbits Earth, different portions of its surface are illuminated by the Sun, creating the visible phases ranging from new moon to full moon. This cycle repeats approximately every 29.5 days.

Question: What is a light-year and what does it measure?
Response: A light-year is the distance that light travels in one year through a vacuum. It measures length or distance, commonly used in astronomy to express the vast distances between celestial objects. One light-year is approximately 9.46 trillion kilometers (5.88 trillion miles).

Question: Why do stars twinkle when viewed from Earth?
Response: Stars twinkle when viewed from Earth because their light passes through Earth's turbulent atmosphere, which causes the light to bend or refract in different directions. This atmospheric distortion makes the star's light appear to change