## RAG from Scratch

<a href="https://colab.research.google.com/github/adithya-s-k/AI-Engineering.academy/blob/main/docs/RAG/00_RAG_Base/RAG_in_10_lines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


#### Setup



In [None]:
!pip install -q sentence-transformers
!pip install -q wikipedia-api
!pip install -q numpy
!pip install -q scipy

### Load the Embedding Model:

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)

### Fetch Text Content from Wikipedia:



In [None]:
from wikipediaapi import Wikipedia
wiki = Wikipedia('RAGBot/0.0', 'en')
doc = wiki.page('Hayao_Miyazaki').text
paragraphs = doc.split('\n\n') # chunking


In [None]:
import textwrap


In [None]:
for i, p in enumerate(paragraphs):
  wrapped_text = textwrap.fill(p, width=100)

  print("-----------------------------------------------------------------")
  print(wrapped_text)
  print("-----------------------------------------------------------------")


### Embed the Document:

In [None]:
docs_embed = model.encode(paragraphs, normalize_embeddings=True)

In [None]:
docs_embed.shape

In [None]:
docs_embed[0]

### Embed the Query:

In [None]:
query = "What was Studio Ghibli's first film?"
query_embed = model.encode(query, normalize_embeddings=True)


In [None]:
query_embed.shape

### Find the Closest Paragraphs to the Query:



In [None]:
import numpy as np
similarities = np.dot(docs_embed, query_embed.T)

In [None]:
similarities.shape

In [None]:
similarities

In [None]:
top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()


In [None]:
top_3_idx

In [None]:
most_similar_documents = [paragraphs[idx] for idx in top_3_idx]

In [None]:
CONTEXT = ""
for i, p in enumerate(most_similar_documents):
  wrapped_text = textwrap.fill(p, width=100)

  print("-----------------------------------------------------------------")
  print(wrapped_text)
  print("-----------------------------------------------------------------")
  CONTEXT += wrapped_text + "\n\n"

In [None]:
query = "What was Studio Ghibli's first film?"

In [None]:
prompt = f"""
use the following CONTEXT to answer the QUESTION at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

CONTEXT: {CONTEXT}
QUESTION: {query}

"""

In [None]:
!pip install -q openai

In [None]:
# prompt: write python code to make calls to openai api
from google.colab import userdata
userdata.get('openai')

import openai



In [None]:
from openai import OpenAI
client = OpenAI(api_key=userdata.get('openai'))

In [None]:
response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "user", "content": prompt},
  ]
)

In [None]:
print(response.choices[0].message.content)
