## Download dependency:

In [1]:
# Install compatible versions of langchain-core and langchain-openai
%pip install langchain-community==0.4.1
%pip install langchain-text-splitters==1.0.0
%pip install langchain-openai==1.1.0
%pip install langsmith==0.4.49
%pip install langchain==1.1.0

# Install remaining packages
%pip install langchain-chroma==1.0.0
%pip install chromadb==1.3.5
%pip install beautifulsoup4==4.14.2
%pip install python-dotenv==1.2.1

Collecting langchain-community==0.4.1
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-core<2.0.0,>=1.0.1 (from langchain-community==0.4.1)
  Downloading langchain_core-1.2.14-py3-none-any.whl.metadata (4.4 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community==0.4.1)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting SQLAlchemy<3.0.0,>=1.4.0 (from langchain-community==0.4.1)
  Downloading sqlalchemy-2.0.46-py3-none-any.whl.metadata (9.5 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community==0.4.1)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting PyYAML<7.0.0,>=5.3.0 (from langchain-community==0.4.1)
  Downloading pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl.metadata (2.4 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community==0.4.1)
  Downloading aiohttp-3.13.3-cp311-cp311-macosx_10_9_x86_64.whl.metadata (8.1 kB)
Collecting tenacity!=8.4

## **1-Imports:**

In [1]:
import os
os.environ['USER_AGENT'] = 'RAGUserAgent'

In [2]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
import openai
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langsmith import Client
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv

## **2-OpenAI Connections:**

In [3]:
# OpenAI Setup
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
openai.api_key = os.environ['OPENAI_API_KEY']

## **3-Indexing:**
### In this particular example the steps are as follows:
#### - Web loading and crawling.
#### - Splitting the data into chunks
#### - Embedding and indexing those chunks.
#### - Adding those chunks and embeddings to the vector database.

### A)Web loading and crawling:

In [4]:
loader = WebBaseLoader(web_paths=("https://kbourne.github.io/chapter1.html",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))),)
docs = loader.load()

### B)Splitting:

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
splits = text_splitter.split_documents(docs)

### C)Embedding and indexing:

## **4-Retrieval and generation:**

In [None]:
vectorstore = Chroma.from_documents(
    documents=splits, 
    embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

### A)Prompt templates from LangChain Hub:

In [14]:
client = Client()
prompt = client.pull_prompt("jclemens24/rag-prompt")

In [16]:
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'jclemens24', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '1a1f3ccb9a5a92363310e3b130843dfb2540239366ebe712ddd94982acc06734'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


### B)Formatting Function:

In [None]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

### C)Defining your LLM:

In [None]:
# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

## **5-Setting up a langchain chain using LCEL:**

In [None]:
# Chain it all together with LangChain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## **6-Submitting a question:**

In [None]:
# Question - run the chain
rag_chain.invoke("What are the advantages of using RAG?")

# ** Adding New Alternative: **

In [None]:
# Imports
from langchain_core.runnables import RunnableParallel

In [None]:
# Chain it all together with LangChain
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)


rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [None]:
# Question - run the chain
result = rag_chain_with_source.invoke("What are the Advantage of using RAG?")
result