In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [3]:
load_dotenv()
api_key = os.getenv("api_key")

if not api_key:
    print("Api key not found")
else:
    print("Api key loaded successfully")

Api key loaded successfully


In [4]:
documents = [
    "Python is a high-level programming language known for readability and simplicity",
    "Machine learning is a subset of AI that enables systems to learn from data.",
    "RAG combines retrievable and generation to provide accurate, grounded responses."
]


In [4]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(documents)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
query = ["What is RAG?"]
query_embedding = model.encode(query)[0]
query_embedding

array([-6.95707723e-02,  9.51999873e-02,  1.60213951e-02,  6.80146040e-03,
       -8.84049833e-02,  1.42048458e-02,  5.40277660e-02,  4.56368998e-02,
       -3.19217443e-02, -2.95636989e-02,  5.67037286e-03,  6.36049360e-03,
        2.30016485e-02, -5.11627048e-02, -4.39477079e-02,  5.89560680e-02,
        5.50739467e-02,  9.31472555e-02, -1.99183207e-02,  5.56547148e-03,
       -6.06279224e-02,  7.54946619e-02, -1.02010812e-03, -3.40628847e-02,
        4.00799699e-02,  4.76241447e-02, -3.31660882e-02, -7.22332683e-04,
        5.98662868e-02, -8.33192468e-03, -9.59183741e-03,  5.34927063e-02,
       -2.39231139e-02, -2.01412123e-02, -4.85539325e-02,  1.87074877e-02,
       -2.20365617e-02,  6.23629428e-02,  4.00254037e-03,  2.97090653e-02,
        1.27504747e-02,  1.79135846e-03, -2.17953287e-02, -8.11001360e-02,
       -2.61422654e-04,  3.26574892e-02, -1.05920136e-02,  3.05196643e-02,
        3.11776567e-02, -1.16894813e-02,  5.91254793e-03,  5.38987434e-03,
        2.91767586e-02,  

In [6]:
similarities = np.dot(embeddings, query_embedding)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

In [7]:
retrieved_doc

'RAG combines retrievable and generation to provide accurate, grounded responses.'

In [8]:
client = OpenAI(api_key=api_key)
prompt = f"""context: {retrieved_doc}

Qusetion: {query}
Answer based on the context:
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0
)



In [9]:
response.choices[0].message.content

'RAG is a system that combines retrievable and generation techniques to provide accurate and grounded responses.'

In [10]:
#Custom RAG
from sentence_transformers import SentenceTransformer
import numpy as np
from openai import OpenAI

#1. Generate embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = model.encode(documents)

#2. Query and retrieve
query = "What is RAG?"
query_embedding = model.encode([query])[0]

#3. Compute Similarities
similarities = np.dot(doc_embeddings, query_embedding)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

#4. Generate response
client = OpenAI(api_key=api_key)
prompt = f""" Context: {retrieved_doc}

Question: {query}

Answer based on the context
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo", 
    messages=[{"role": "user", "content": prompt}], 
    temperature=0
)

print("Custom RAG Answer")
print(response.choices[0].message.content)

Custom RAG Answer
RAG is a system that combines retrievable and generation techniques to provide accurate and grounded responses.


In [None]:
!pip install langchain langchain-core langchain-community langchain-openai langchain-text-splitters --quiet
!pip install faiss-cpu python-dotenv --quiet


[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


: 

In [4]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

lc_docs = [Document(page_content=doc) for doc in documents]

embeddings = OpenAIEmbeddings(openai_api_key = api_key)

vector_store = Chroma.from_documents(
    lc_docs,
    embeddings,
    collection_name="my_rag_collection",
    persist_directory= "./chroma_db"
)

retriever = vector_store.as_retriever()

#llm
llm = ChatOpenAI(
    model = "gpt-4o-mini",
    temperature = 0,
    openai_api_key = api_key
)

#Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert assistant. Use ONLY the retrieved context."),
    ("human", "{question}\n\nContext:\n{context}")
])

#Build RAG pipeline
rag_chain = (
    RunnableParallel(context=retriever, question=RunnablePassthrough())
    | prompt
    | llm
)

#Query
response = rag_chain.invoke("What is RAG")
print(response)

  from .autonotebook import tqdm as notebook_tqdm


content='RAG stands for "retrieval-augmented generation." It combines retrieval and generation techniques to provide accurate and grounded responses.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 93, 'total_tokens': 118, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_aa07c96156', 'id': 'chatcmpl-ClsAHtoelmvbuEIuugmhmlPmHjayx', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--019b1187-a292-7d11-9c6b-0dbdd1c20a17-0' usage_metadata={'input_tokens': 93, 'output_tokens': 25, 'total_tokens': 118, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

#Convert documents
lc_docs = [Document(page_content=doc) for doc in documents]
embeddings = OpenAIEmbeddings(openai_api_key = api_key)


In [5]:
!pip install llama-index

Collecting llama-index
  Downloading llama_index-0.14.10-py3-none-any.whl.metadata (13 kB)
Collecting llama-index-cli<0.6,>=0.5.0 (from llama-index)
  Downloading llama_index_cli-0.5.3-py3-none-any.whl.metadata (1.4 kB)
Collecting llama-index-core<0.15.0,>=0.14.10 (from llama-index)
  Downloading llama_index_core-0.14.10-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-embeddings-openai<0.6,>=0.5.0 (from llama-index)
  Downloading llama_index_embeddings_openai-0.5.1-py3-none-any.whl.metadata (400 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.9.4-py3-none-any.whl.metadata (3.7 kB)
Collecting llama-index-llms-openai<0.7,>=0.6.0 (from llama-index)
  Downloading llama_index_llms_openai-0.6.10-py3-none-any.whl.metadata (3.0 kB)
Collecting llama-index-readers-file<0.6,>=0.5.0 (from llama-index)
  Downloading llama_index_readers_file-0.5.5-py3-none-any.whl.metadata (5.7 kB)
Collecting llama-


[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from llama_index.core import Document, VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

#Configure llamaindex
Settings.llm = LlamaOpenAI(model="gpt-3.5-turbo", temperature = 0, api_key = api_key)
Settings.embed_model = OpenAIEmbedding(api_key= api_key)

#Create documents and index
llama_docs = [Document(text=doc) for doc in documents]
index = VectorStoreIndex.from_documents(llama_docs)

#Query
query_engine = index.as_query_engine()
response = query_engine.query("What is RAG?")

print("Llama index Answer")
print(response)

  from .autonotebook import tqdm as notebook_tqdm


Llama index Answer
RAG is a system that combines retrievable and generation techniques to provide accurate and grounded responses.


: 