In [8]:
## Load environment variables

import os
from dotenv import load_dotenv, find_dotenv, dotenv_values

# Load with explicit path and allow override
dotenv_path = find_dotenv(usecwd=True)
print("dotenv_path:", dotenv_path or "NOT FOUND")
load_dotenv(dotenv_path=dotenv_path, override=True)

# Show what was parsed from the file (safe preview)
parsed = dotenv_values(dotenv_path) if dotenv_path else {}
print("Keys in .env:", sorted(parsed.keys()))
print("Has OPENAI_API_KEY in .env?:", "OPENAI_API_KEY" in parsed)

val = os.getenv("OPENAI_API_KEY")
print("Env OPENAI_API_KEY present?:", val is not None)
print("Value prefix (masked):", (val[:6] + "…") if val else None)

# Current working directory (to catch path mistakes)
print("cwd:", os.getcwd())

dotenv_path: /Users/anupam/Documents/Programming/rag101/.env
Keys in .env: ['LANGSMITH_API_KEY', 'LANGSMITH_ENDPOINT', 'LANGSMITH_PROJECT', 'LANGSMITH_TRACING', 'OPENAI_API_KEY', 'POSTS_SOURCE']
Has OPENAI_API_KEY in .env?: True
Env OPENAI_API_KEY present?: True
Value prefix (masked): sk-pro…
cwd: /Users/anupam/Documents/Programming/rag101


In [9]:
# Define LLM model

import getpass, os
from langchain.chat_models import init_chat_model

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

llm = init_chat_model("gpt-4o-mini", model_provider="openai", verbose=True)

In [10]:
# Choose embeddings

import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [11]:
# Chose vector store

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [12]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
import json

class BlogPost(TypedDict):
    id: str
    title: str
    link: str
    body: str

def load_posts(json_path: str) -> List[BlogPost]:
    # Load posts from JSON file
    with open(json_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def create_documents(posts: List[TypedDict]) -> List[Document]:
    # Convert posts to Langchain documents
    documents = []

    for post in posts:
        # Combine title and body
        full_text = f"Title: {post['title']}\n\n{post['body']}"
        
        # Add metadata
        metadata = {
            'link': post['link'],
            'title': post['title'],
            'post_id': post['id'],
        }
        documents.append(Document(page_content=full_text, metadata=metadata, id=post['id']))
    return documents

# Load posts
print(f"Loading posts...")
posts = load_posts(os.environ.get("POSTS_SOURCE"))
print(f"Loaded {len(posts)} posts")

# Convert to Langchain documents
print(f"Converting to Langchain documents...")
documents = create_documents(posts)
print(f"Created {len(documents)} documents")

# Index documents
document_ids = vector_store.add_documents(documents=documents)

print("Document Ids:", document_ids[:5])

# Define prompt for question-answering
prompt = PromptTemplate(
    input_variables=["question", "context"],
    template="""
        Act as a conversational interface for answering questions based on the content of the blog in your knowledge base.

        When posts related to a specific topic don't exist, return no results.
                
        Question: {question} 
        Context: {context} 
        Answer:
        """
)

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State, min_similarity: float = 0.4, max_docs: int = 8):
    """
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}
    """
    results = vector_store.similarity_search_with_score(state["question"], k=max_docs)
    # Filter by threshold; note: depending on backend, higher score can mean closer or farther.
    # For Chroma + cosine similarity in LC, score is often distance; adjust comparator accordingly.
    relevant = []
    relevant_log = []
    seen_keys = set()
    for doc, score in results:
        if score >= min_similarity:
            key = doc.metadata.get('post_id')
            if key in seen_keys:
                continue
            seen_keys.add(key)
            relevant.append(doc)
            relevant_log.append(f"Doc: {doc.metadata.get('title', 'Unknown')}\nScore: {score}")
    print("\n\n".join(relevant_log))
    return {"context": relevant}

def generate_with_links(state: State):
    if not state["context"]:
        
        return {"answer": "I don't know." + "\n\nNo relevant blog posts found."}
    
    # Get the base answer
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    base_answer = response.content
    
    # Extract unique links from context
    unique_links = {}
    for doc in state["context"]:
        title = doc.metadata.get('title', 'Unknown')
        link = doc.metadata.get('link', '')
        if link and title not in unique_links:
            unique_links[title] = link
    
    # Format links section
    if unique_links:
        links_section = "\n\nRelevant blog posts:\n"
        for title, link in unique_links.items():
            links_section += f"- [{title}]({link})\n"
        
        final_answer = base_answer + links_section
    else:
        final_answer = base_answer + "\n\nNo relevant blog posts found."
    
    return {"answer": final_answer}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate_with_links])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

Loading posts...
Loaded 100 posts
Converting to Langchain documents...
Created 100 documents
Document Ids: ['9', '16', '18', '20', '24']


In [23]:
# Add this as a new cell after Cell 7
# Cell 8: Create evaluation dataset

import gc
from typing import List, Dict

def create_evaluation_dataset(question: str  ) -> List[Dict]:
    """Create evaluation dataset by running questions through the RAG system"""
    
    evaluation_data = []
    # Get RAG response
    response = graph.invoke({"question": question})
    # Extract retrieved contexts (from the retrieve step)
    retrieved_docs = response.get("context", [])
    retrieved_contexts = [doc.page_content for doc in retrieved_docs] if retrieved_docs else []
    answer = response["answer"]
    reference = answer.split("\n\nRelevant blog posts:")[0]  # Remove links section for reference
    evaluation_data.append({
            "user_input": question,
            "retrieved_contexts": retrieved_contexts,
            "response": answer,
            "reference": reference,
            "reference_contexts": retrieved_contexts[1:3]
        })   
    return evaluation_data

Creating evaluation dataset...
Doc: A writing revolution
Score: 0.4463423826536503

Doc: Clarity before complexity
Score: 0.43882085255962705

Doc: Deconstructing writer's block
Score: 0.4105806503491311
Created 1 evaluation examples
To write better, consider the following insights from the blog:

1. **Free Your Memory**: Writing helps you organize your thoughts and prioritize them. By putting your ideas on paper or screen, you can alleviate the mental burden of juggling information in your head.

2. **Clarity Over Complexity**: Focus on conveying a clear message. A simple and coherent message is the foundation of good writing. While style can add interest, it should not overshadow the clarity of your message.

3. **Embrace the Process**: Writer's block is common, but overcoming it involves accepting that not every piece of writing will be perfect. Prolific writers often produce several drafts, understanding that bad work can lead to better outcomes. Embrace the creative process as a r

In [24]:
# Sample question for evaluation
sample_question = "How can I write better?"

print("Creating evaluation dataset...")
evaluation_dataset_raw = create_evaluation_dataset(sample_question)
print(f"Created {len(evaluation_dataset_raw)} evaluation examples")
print(evaluation_dataset_raw[0]["response"])
print(evaluation_dataset_raw[0]["reference"])

Creating evaluation dataset...
Doc: A writing revolution
Score: 0.4463294424582956

Doc: Clarity before complexity
Score: 0.4387144257987414

Doc: Deconstructing writer's block
Score: 0.4104786097739597
Created 1 evaluation examples
To write better, consider the following insights from the blog:

1. **Free Your Memory**: Writing helps you organize your thoughts and prioritize tasks. By putting your ideas on paper or screen, you can alleviate the mental burden of juggling information in your head.

2. **Clarity Over Complexity**: Focus on conveying a clear message. A simple and coherent message serves as the foundation for good writing. While style can enhance your writing, it should not overshadow the clarity of your message.

3. **Embrace the Process**: Understand that writer's block is a common challenge. To overcome it, prolific writers often embrace the idea of producing "bad" work as part of the creative process. This approach allows them to break through barriers and improve over

In [27]:
# Cell 9: Setup Ragas evaluation
from ragas import EvaluationDataset, evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    LLMContextRecall, 
    Faithfulness, 
    FactualCorrectness,
    AnswerRelevancy,
    ContextPrecision
)
def perform_ragas_evaluation(evaluation_dataset_raw):
    # Convert to Ragas format
    evaluation_dataset = EvaluationDataset.from_list(evaluation_dataset_raw)
    
    # Setup evaluator LLM (using the same LLM for consistency)
    evaluator_llm = LangchainLLMWrapper(llm)
    
    # Choose metrics (start with lighter ones to avoid memory issues)
    metrics = [
        AnswerRelevancy(),      # How relevant is the answer to the question
        Faithfulness(),         # Is the answer faithful to the retrieved context
        ContextPrecision()      # How precise is the retrieved context
    ]
    
    print("Starting Ragas evaluation...")
    print("This may take a few minutes...")
    
    # Add garbage collection before evaluation
    gc.collect()
    
    # Run evaluation
    result = evaluate(
        dataset=evaluation_dataset,
        metrics=metrics,
        llm=evaluator_llm
    )
    print("Evaluation completed!")
    print(f"Results: {result}")

In [29]:
# Evaluate sample question for evaluation
sample_question = "How can I write better?"

print("Creating evaluation dataset...")
evaluation_dataset_raw = create_evaluation_dataset(sample_question)
print(f"Created {len(evaluation_dataset_raw)} evaluation examples")
perform_ragas_evaluation(evaluation_dataset_raw)

Creating evaluation dataset...
Doc: A writing revolution
Score: 0.44637411728083753

Doc: Clarity before complexity
Score: 0.4387969789749079

Doc: Deconstructing writer's block
Score: 0.4105540094854879
Created 1 evaluation examples
Starting Ragas evaluation...
This may take a few minutes...


Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

Evaluation completed!
Results: {'answer_relevancy': 0.9218, 'faithfulness': 0.9444, 'context_precision': 1.0000}
