In [1]:
## Load environment variables

import os
from dotenv import load_dotenv, find_dotenv, dotenv_values

# Load with explicit path and allow override
dotenv_path = find_dotenv(usecwd=True)
print("dotenv_path:", dotenv_path or "NOT FOUND")
load_dotenv(dotenv_path=dotenv_path, override=True)

# Show what was parsed from the file (safe preview)
parsed = dotenv_values(dotenv_path) if dotenv_path else {}
print("Keys in .env:", sorted(parsed.keys()))
print("Has OPENAI_API_KEY in .env?:", "OPENAI_API_KEY" in parsed)

val = os.getenv("OPENAI_API_KEY")
print("Env OPENAI_API_KEY present?:", val is not None)
print("Value prefix (masked):", (val[:6] + "…") if val else None)

# Current working directory (to catch path mistakes)
print("cwd:", os.getcwd())

dotenv_path: /Users/anupam/Documents/Programming/rag101/.env
Keys in .env: ['LANGSMITH_API_KEY', 'LANGSMITH_ENDPOINT', 'LANGSMITH_PROJECT', 'LANGSMITH_TRACING', 'OPENAI_API_KEY', 'POSTS_SOURCE']
Has OPENAI_API_KEY in .env?: True
Env OPENAI_API_KEY present?: True
Value prefix (masked): sk-pro…
cwd: /Users/anupam/Documents/Programming/rag101


In [2]:
# Define LLM model

import getpass, os
from langchain.chat_models import init_chat_model

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

llm = init_chat_model("gpt-4o-mini", model_provider="openai", verbose=True)

In [3]:
# Choose embeddings

import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [4]:
# Chose vector store

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [5]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
import json

class BlogPost(TypedDict):
    id: str
    title: str
    link: str
    body: str

def load_posts(json_path: str) -> List[BlogPost]:
    # Load posts from JSON file
    with open(json_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def create_documents(posts: List[TypedDict]) -> List[Document]:
    # Convert posts to Langchain documents
    documents = []

    for post in posts:
        # Combine title and body
        full_text = f"Title: {post['title']}\n\n{post['body']}"
        
        # Add metadata
        metadata = {
            'link': post['link'],
            'title': post['title'],
            'post_id': post['id'],
        }
        documents.append(Document(page_content=full_text, metadata=metadata, id=post['id']))
    return documents

# Load posts
print(f"Loading posts...")
posts = load_posts(os.environ.get("POSTS_SOURCE"))
print(f"Loaded {len(posts)} posts")

# Convert to Langchain documents
print(f"Converting to Langchain documents...")
documents = create_documents(posts)
print(f"Created {len(documents)} documents")

# Index documents
document_ids = vector_store.add_documents(documents=documents)

print("Document Ids:", document_ids[:5])

# Define prompt for question-answering
prompt = PromptTemplate(
    input_variables=["question", "context"],
    template="""
        Act as a conversational interface for answering questions based on the content of the blog in your knowledge base.

        When posts related to a specific topic don't exist, return no results.
                
        Question: {question} 
        Context: {context} 
        Answer:
        """
)

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State, min_similarity: float = 0.44 , max_docs: int = 8):
    """
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}
    """
    results = vector_store.similarity_search_with_score(state["question"], k=max_docs)
    # Filter by threshold; note: depending on backend, higher score can mean closer or farther.
    # For Chroma + cosine similarity in LC, score is often distance; adjust comparator accordingly.
    relevant = []
    relevant_log = []
    seen_keys = set()
    for doc, score in results:
        if score >= min_similarity:
            key = doc.metadata.get('post_id')
            if key in seen_keys:
                continue
            seen_keys.add(key)
            relevant.append(doc)
            relevant_log.append(f"Doc: {doc.metadata.get('title', 'Unknown')}\nScore: {score}")
    print("\n\n".join(relevant_log))
    return {"context": relevant}

def generate_with_links(state: State):
    if not state["context"]:
        
        return {"answer": "I don't know." + "\n\nNo relevant blog posts found."}
    
    # Get the base answer
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    base_answer = response.content
    
    # Extract unique links from context
    unique_links = {}
    for doc in state["context"]:
        title = doc.metadata.get('title', 'Unknown')
        link = doc.metadata.get('link', '')
        if link and title not in unique_links:
            unique_links[title] = link
    
    # Format links section
    if unique_links:
        links_section = "\n\nRelevant blog posts:\n"
        for title, link in unique_links.items():
            links_section += f"- [{title}]({link})\n"
        
        final_answer = base_answer + links_section
    else:
        final_answer = base_answer + "\n\nNo relevant blog posts found."
    
    return {"answer": final_answer}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate_with_links])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

USER_AGENT environment variable not set, consider setting it to identify your requests.


Loading posts...
Loaded 100 posts
Converting to Langchain documents...
Created 100 documents
Document Ids: ['9', '16', '18', '20', '24']


In [6]:
# Add this as a new cell after Cell 7
# Cell 8: Create evaluation dataset

import gc
from typing import List, Dict

def create_evaluation_dataset(question: str  ) -> List[Dict]:
    """Create evaluation dataset by running questions through the RAG system"""
    
    evaluation_data = []
    # Get RAG response
    response = graph.invoke({"question": question})
    # Extract retrieved contexts (from the retrieve step)
    retrieved_docs = response.get("context", [])
    retrieved_contexts = [doc.page_content for doc in retrieved_docs] if retrieved_docs else []
    answer = response["answer"]
    reference = answer.split("\n\nRelevant blog posts:")[0]  # Remove links section for reference
    evaluation_data.append({
            "user_input": question,
            "retrieved_contexts": retrieved_contexts,
            "response": answer,
            "reference": """
                        To write better, consider the following insights from the blog:

                        1. **Free Your Memory**: Writing helps you organize your thoughts and prioritize tasks. By putting your ideas on paper or screen, you can alleviate mental clutter and gain clarity.
                        
                        2. **Clarity Over Complexity**: Focus on conveying a simple and coherent message. While style can enhance your writing, it should not overshadow the clarity of your message. Aim to express your ideas in a way that even a five-year-old could understand.
                        
                        3. **Embrace the Process**: Understand that writer's block is a common challenge. To overcome it, prolific writers often embrace the idea of producing "bad" work. They recognize that creating multiple drafts, even if they are not perfect, can lead to better writing in the end.
                        
                        4. **Revise and Reflect**: Writing allows you to see your thoughts reflected back at you. Use this opportunity to revise and refine your ideas, which can help improve your writing skills over time.
                        
                        By incorporating these practices into your writing routine, you can enhance your skills and develop a more effective writing style.
                        """,
            "reference_contexts": ['Title: A writing revolution\n\nWhat does it mean to write? How does it help? Let me list the three things that come to my mind most easily.\n\nWriting frees up my memory. It keeps me from constantly juggling with information in my head. This applies to shopping lists, to-do lists, as well as to problems and concerns that plague my head. Writing helps imprison them on paper, or in more recent times, behind a screen.\xa0It helps me look at these items and prioritize them. Whatever festers as a demon in my head eventually turns into a mere trifle after I write it down.\n\nSecondly, it helps me step above information and observe the patterns that emerge. Journals, blogs and articles help their authors ideate by doing so. Along with this clarity comes accountability. I am forced to think and express my ideas coherently, since it is impossible to take back what I have written.\n\nMost importantly, writing gives us feedback on what our thoughts look like. Unlike spoken words, which fade away, written words stare back like reflections from a mirror. Thereafter, writing enables us to go back and revise what we express, closing the loop. By editing our sentences, we are also rewiring our brain.\n\nWriting is one of our oldest triumphs as a species. At the same time, it is sorely underrated. Only a handful of people utilize it. There is scope yet for a writing revolution - where each person writes everyday just as surely as they bathe or brush their teeth.', "Title: Clarity before complexity\n\nThe constituents of writing\xa0are primarily a message and a writing style.\n\nThe message is the the idea behind the written words. It is the concept that a textbook explains, the plot that a story elaborates, the universal truth that a poem connects with, the event that a newspaper publicises or the discovery that a research paper publishes. At its crux, a message should be simple enough for a five-year old to understand.\n\nBut reading what a five-year old can comprehend is boring. Our mind craves complexity and sophistication. This is where style comes in.\n\nStyle is the choice of words used to convey the meaning. It could be simple and straightforward, or could comprise meaningful metaphors and alluring alliterations. It enables the writer to express herself while entertaining her audience and having fun. We are born puzzle-solvers and writers weave clever patterns into their writing to be more interesting. In some cases, the subject matter is abstract enough for the writer to do away with complexity completely, and adopt a simple style. This is true especially of research papers. At other times, an ornamental style enhances writing, like it does in literature.\n\nHowever, style and message do not get along well all the time. The need to for clarity could conflict with the complexity that style can induce. How does one prioritize?\n\nIn most cases, the message takes precedence over style. A simple and coherent message serves as the foundation for good writing. This would be true for all forms of writing, across literature, poetry and journalism and especially scientific writing. The trunk of the tree ought to be thicker than its branches.\n\nMost budding writers (including me) struggle with this prioritization. It is easier to write long sophisticated sentences than to simplify an idea. This complexity is often present because the message is not clear to the writer's himself. Sophisticated language serves as a veil for incompetence. Our minds and its mechanics are constantly tricking us (and others) into believing that we are smarter than we actually are.\n\nAll great writing has survived through the ages because of the coherent message beneath their intricate sentences. This is why the essence of Shakespeare's plays are simple, yet profound expressions of human nature.\n\nIn summary, it is better to write exactly what you mean, before resorting to fluffy and flowery, but frivolous language.", 'Title: Deconstructing writer\'s block\n\nWhat is writer\'s block?\n\nIt is sitting at the table to write, but not having the words to follow through.\xa0Anybody who has written for a while has experienced it. I have experienced it several times. I still do, as I type out these words.\xa0More broadly, this can be called the artist\'s block, because every creative profession seems to suffer from it. But what makes these professions unique? Why do plumbers not have plumber\'s block? Why do carpenters not have carpenter\'s block? Few things could be more disastrous than surgeon\'s block!\n\nThis is partly because of the inherent randomness of the quality of output in creative processes.\xa0By definition, creativity does not follow a set process. Creativity taps into a different part of the brain than the one used to perform logical, repeatable processes.\xa0 No wonder that albums released by musicians, different works of painters, and books published by authors can vary so widely in the quality of their output.\n\nBut this variation alone is not enough. Additionally, there is the tendency to hold on to one\'s last good piece of work. The tendency to allow build one\'s perception around "a hit" and the tendency to not fall down from these standards. Again, the wiring of our brain is to blame here. Our tribalistic need to protect our reputation has helped us survive through our evolution. It now holds us back by inducing fear into our next creative project by whispering to us, "if you can\'t make it better, you are better off not trying". Moreover, the better we get, the louder this voice whispers.\n\nThis combination of wide variation in quality, and clinging to one\'s best work is what causes the crippling phenomenon of writer\'s block. All prolific artists who overcome writer\'s block embrace bad work. They approach the creative process like a routine. They realize that the only way to write better than their best work, is to follow through with 5 bad ones, so that the 6th work will rise to the occasion.\n\nAnd by doing so, they kill writer\'s block before it kills them.\n\nInspiration: Seth Godin\'s excellent podcast - Akimbo']
        })   
    return evaluation_data

In [7]:
# Cell 9: Setup Ragas evaluation
from ragas import EvaluationDataset, evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    LLMContextRecall, 
    Faithfulness, 
    FactualCorrectness,
    AnswerRelevancy,
    LLMContextPrecisionWithoutReference,
    LLMContextPrecisionWithReference,
    NonLLMContextPrecisionWithReference,
    LLMContextRecall,
    NonLLMContextRecall
    
)
def perform_ragas_evaluation(evaluation_dataset_raw):
    # Convert to Ragas format
    evaluation_dataset = EvaluationDataset.from_list(evaluation_dataset_raw)
    
    # Setup evaluator LLM (using the same LLM for consistency)
    evaluator_llm = LangchainLLMWrapper(llm)
    
    # Choose metrics (start with lighter ones to avoid memory issues)
    metrics = [
        AnswerRelevancy(),      # How relevant is the answer to the question
        Faithfulness(),         # Is the answer faithful to the retrieved context
        LLMContextPrecisionWithoutReference(), 
        # LLMContextPrecisionWithReference(),
        # NonLLMContextPrecisionWithReference(),
        # LLMContextRecall(),
        # NonLLMContextRecall(),
    ]
    
    print("Starting Ragas evaluation...")
    print("This may take a few minutes...")
    
    # Add garbage collection before evaluation
    gc.collect()
    
    # Run evaluation
    result = evaluate(
        dataset=evaluation_dataset,
        metrics=metrics,
        llm=evaluator_llm
    )
    print("Evaluation completed!")
    print(f"Results: {result}")

In [8]:
# Sample question for evaluation
sample_question = "How can I write better?"

print("Creating evaluation dataset...")
evaluation_dataset_raw = create_evaluation_dataset(sample_question)
print(f"Created {len(evaluation_dataset_raw)} evaluation examples")
print('Here is the response: \n\n', evaluation_dataset_raw[0]['response']) 
perform_ragas_evaluation(evaluation_dataset_raw)

Creating evaluation dataset...
Doc: A writing revolution
Score: 0.44637411728083753
Created 1 evaluation examples
Here is the response: 

 To write better, you can consider the following points derived from "A writing revolution":

1. **Free Up Your Memory**: Writing helps you organize your thoughts and tasks, reducing the mental clutter. Make lists (shopping, to-do, etc.) to prioritize what’s important and alleviate the burden of juggling information in your head.

2. **Observe Patterns**: Use writing as a tool to identify and reflect on patterns in your thoughts and ideas. This can enhance your clarity and help you express your ideas more coherently and with accountability.

3. **Seek Feedback from Your Writing**: Written words serve as a reflection of your thoughts. Revisit and revise what you’ve written to gain insights and improve your expression. Editing not only refines your writing but also helps rewire your brain for clearer thinking.

4. **Make Writing a Habit**: Consider inc

Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

Evaluation completed!
Results: {'answer_relevancy': 0.9332, 'faithfulness': 1.0000, 'llm_context_precision_without_reference': 1.0000}


In [9]:
# Negative case for evaluation
sample_question = "What is the taste of an orange?"

print("Creating evaluation dataset...")
evaluation_dataset_raw = create_evaluation_dataset(sample_question)
print(f"Created {len(evaluation_dataset_raw)} evaluation examples")
print('Here is the response: \n\n', evaluation_dataset_raw[0]['response']) 
perform_ragas_evaluation(evaluation_dataset_raw)

Creating evaluation dataset...

Created 1 evaluation examples
Here is the response: 

 I don't know.

No relevant blog posts found.
Starting Ragas evaluation...
This may take a few minutes...


Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

Evaluation completed!
Results: {'answer_relevancy': 0.0000, 'faithfulness': 0.0000, 'llm_context_precision_without_reference': 0.0000}
