# Compare Phi-3 & Llama-3 using RAG


<img src="./resources/architecture.png" width=800px>

In [1]:
import nest_asyncio
from dotenv import load_dotenv
from IPython.display import Markdown, display

from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.core import PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader

In [2]:
# allows nested access to the event loop
nest_asyncio.apply()

In [3]:
# add your documents in this directory, you can drag & drop
input_dir_path = '/teamspace/studios/this_studio/test-dir'

In [4]:
# setup llm & embedding model
# llm=Ollama(model="phi3", request_timeout=120.0)
llm=Ollama(model="llama3", request_timeout=120.0)
# embed_model = HuggingFaceEmbedding( model_name="Snowflake/snowflake-arctic-embed-m", trust_remote_code=True)
embed_model = HuggingFaceEmbedding( model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [5]:
# load data
loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

# Creating an index over loaded data
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)

# Create the query engine, where we use a cohere reranker on the fetched nodes
Settings.llm = llm
query_engine = index.as_query_engine(similarity_top_k=1)

# ====== Customise prompt template ======
qa_prompt_tmpl_str = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
"Query: {query_str}\n"
"Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

# Generate the response
response = query_engine.query("What exactly is DSPy?",)

Parsing nodes:   0%|          | 0/17 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/26 [00:00<?, ?it/s]

In [6]:
display(Markdown(str(response)))

A nice challenge!

Based on the provided context, I'll take a step-by-step approach to answer your question.

According to the text, DSPy is a framework for programmatically solving advanced tasks with language and retrieval models through composing and declaring modules. It aims to replace brittle "prompt engineering" tricks with composable modules and automatic optimizers.

In simpler terms, DSPy is a tool that helps developers create sophisticated language models by breaking down complex tasks into smaller, manageable pieces (modules) and allowing them to specify what these modules should do declaratively.

So, the answer is: DSPy is a framework for programmatically solving advanced tasks with language and retrieval models through composing and declaring modules.