### Hybrid RAG (Dense and Sparse Search)

In [None]:
import os

from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings

load_dotenv()

embeddings = OpenAIEmbeddings()

In [None]:

from langchain.document_loaders import CSVLoader

loader = CSVLoader("./context.csv", encoding="utf8")
documents = loader.load()

In [None]:

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

In [None]:
from pinecone import Pinecone as PineconeClient, ServerlessSpec

pc = PineconeClient(
    api_key=os.environ.get("PINECONE_API_KEY"),
)

In [None]:

pc.create_index(
        name='my-indexv2',
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

In [None]:

index_name = "my-indexv2"

In [None]:

from langchain.vectorstores import Pinecone
vectorstore = Pinecone.from_documents(
    documents=documents,
    embedding=embeddings,
    index_name=index_name
)

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
from langchain.retrievers import BM25Retriever

keyword_retriever = BM25Retriever.from_documents(documents)
keyword_retriever.k =  3

In [None]:
keyword_retriever.get_relevant_documents("what bacteria grow on macconkey agar")

In [None]:
from langchain.retrievers import EnsembleRetriever

ensemble_retriever = EnsembleRetriever(
    retrievers=[retriever, keyword_retriever], 
    weights=[0.5, 0.5])

In [None]:
ensemble_retriever.get_relevant_documents("what bacteria grow on macconkey agar")

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

template = """"
You are a helpful assistant that answers questions based on the following context.
If you don't find the answer in the context, just say that you don't know.
Context: {context}

Question: {input}

Answer:

"""
prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"context": ensemble_retriever,  "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
response = rag_chain.invoke('what bacteria grow on macconkey agar')

response