<a href="https://colab.research.google.com/github/abenfaddoul/Rag/blob/main/rag_gemini15flash.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Building a RAG Application in 10 min with Gemini 1.5 Flash and Hugging Face

In [None]:
# We install the necessary libraries
!pip install langchain sentence-transformers
!pip install langchain-community
!pip install langchain-huggingface
!pip install -q -U google-generativeai
!pip install pypdf

In [44]:
# To split the documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
# To use MyScale as a vector database
from langchain_community.vectorstores import MyScale
# To use Hugging Face for embeddings
from langchain_huggingface import HuggingFaceEmbeddings
# To load PDF
from langchain_community.document_loaders import PyPDFLoader
# To use Gemini 1.5 Flash
import google.generativeai as genai
# To use os
import os

In [45]:
# Setting up the vector database connections
os.environ["MYSCALE_HOST"] = "Your_Host"
os.environ["MYSCALE_PORT"] = "443"
os.environ["MYSCALE_USERNAME"] = "Your_Username"
os.environ["MYSCALE_PASSWORD"] = "Your_Password"

# Setting up the API key for the Gemini 1.5 Flash
genai.configure(api_key="")
model = genai.GenerativeModel("gemini-1.5-flash")


In [52]:
# We select Docs & provide path
loader = PyPDFLoader("Your_pdf",)

# We load the documents
docs = loader.load()

In [53]:
# We split the loaded documents into manageable chunks
character_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = character_splitter.split_documents(docs)

In [None]:
# We use a Hugging Face embedding model to create embeddings for our document chunks
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
docsearch = MyScale(embedding=embeddings)
docsearch.add_documents(docs)

In [None]:
# We test the search functionality to ensure that embeddings and vector store are working correctly
query = "Text to search or question"
docs = docsearch.similarity_search(query, 1)
docs

In [56]:
# We prepare the context from the retrieved document
rag_context = "".join(doc.page_content for doc in docs)

In [None]:
# We generate the response based on the provided context
prompt = "Your prompt"
contents = [
    rag_context,
    prompt,
]
response = model.generate_content(contents)
response.text