# Daily Dose of Data Science

This notebook accompanies the code for RAG demo published in the Daily Dose of Data Science.

Read the article here: [A crash course on RAG - Part 1](https://www.dailydoseofds.com/a-crash-course-on-building-rag-systems-part-1-with-implementations/)

## Set up Asyncio

In [None]:
import nest_asyncio

nest_asyncio.apply()

## Set up the Qdrant vector database

In [None]:
import qdrant_client

collection_name="chat_with_docs"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

## Read the documents

In [None]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = load## Set up Asyncioer.load_data()

In [None]:
type(docs), len(docs)

## A function to index data

In [None]:
from llama_index.vecto## Set up Asyncior_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
    )
    return index

## Load the embedding model and index data

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
## Set up Asyncio
Settings.embed_model = embed_model

index = create_index(docs)

## Define the prompt template

In [None]:
from llama_index.llms.ollama import Ollama

llm=Ollama(model="## Set up Asynciollama3.2:1b", request_timeout=120.0)

Settings.llm = llm

## Define the prompt template

In [None]:
from llama_index.core import PromptTemplate

qa_prompt_tmpl_str = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
"Query: {query_str}\n"## Set up Asyncio
"Answer: "
)

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

## Reranking

In [None]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

## Query the document

In [None]:
query_engine = index.as_query_engine(similarity_top_k=10, node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

response = query_engine.que## Set up Asynciory("What exactly is DSPy?")

## Print response

In [None]:
from IPython.display import Markdown, display

display(Markdown(str(response)))## Set up Asyncio