In [None]:
!pip install langchain-community

!pip install langchain_google_genai

!pip install langchain_chroma

In [None]:
from langchain_community.document_loaders import PyPDFLoader

PDF_PATH = "Cap 07 Arrays.pdf"

loader = PyPDFLoader(PDF_PATH)  # Load your PDF file
data = loader.load()
print(data)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
docs = text_splitter.split_documents(data)

print("Total number of Chunks: ", len(docs))  # Check how many chunks we have
for chunk in docs:
    print(chunk.page_content)

In [None]:
import google.generativeai as genai
import os

api_key = "AIzaSyCoxFsjIYKIz0jxIwlHYR5tI1by7LRvqw4"

os.environ["GEMINI_API_KEY"] = api_key

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("GEMINI_API_KEY is not set. Please set it as an environment variable.")

# Load the Gemini API key
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)

# Test embedding a query
vector = embeddings.embed_query("hello, world!")
print(len(vector))
print(vector[0])

In [6]:
from langchain.vectorstores import Chroma

PERSISTENT_DIRECTORY = "chroma"

vectorstoredb = Chroma.from_documents(
    documents=docs, embedding=embeddings, persist_directory=PERSISTENT_DIRECTORY
)

retriever = vectorstoredb.as_retriever(search_type="similarity", search_kwargs={"k": 5})


In [None]:
retrieved_docs = retriever.invoke("como distinguir los objetivos especificos")
print(len(retrieved_docs))
print(retrieved_docs[0].page_content)  # Print the first retrieved document

In [None]:
def get_relevant_passage(query, db, n_results):
#   passage = db.query(query_texts=[query], n_results=n_results)['documents'][0]
  return retriever.invoke(query)

#Example usage
relevant_text = get_relevant_passage(query="como distinguir los objetivos especificos",db=None,n_results=3)

relevant_text

In [9]:
def make_rag_prompt(query, relevant_passage):
  # escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = ('''You are a friendly and knowledgeable AI tutor that answers questions.  
Your goal is to explain concepts clearly and thoroughly, breaking down any technical details into simple terms suitable for a non-technical audience.  
Maintain a warm, conversational tone as if you are guiding a student step by step.  

You must base your answer exclusively on the content from the passage and the examples included in it.  
If the passage does not relate to the question, politely explain that the answer is not available in the provided material.  
Respond **in Spanish**, and make sure your explanation is easy to follow.  
The topic revolves around **C# programming**, so focus on simplifying and clarifying relevant concepts.

At the end add a reference to the source, just the name of the document and pages of the passages.

Do not add extra information.

QUESTION: '{query}'  
PASSAGE: '{relevant_passage}'  

ANSWER: '''  
).format(query=query, relevant_passage=relevant_passage)

  return prompt

In [10]:
import google.generativeai as genai

def generate_answer(prompt):
    gemini_api_key = os.getenv("GEMINI_API_KEY")
    if not gemini_api_key:
        raise ValueError("Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable")
    
    genai.configure(api_key=gemini_api_key)
    model = genai.GenerativeModel('models/gemini-pro')
    
    # Pass temperature in a dictionary
    generation_config = {"temperature": 0.3}
    response = model.generate_content(contents=prompt, generation_config=generation_config)
    
    return response.text


In [11]:
def generate_answer_2(db, query):
    #retrieve top 3 relevant text chunks
    relevant_text = get_relevant_passage(query,db,n_results=3)
    prompt = make_rag_prompt(query, relevant_text) # joining the relevant chunks to create a single passage
    answer = generate_answer(prompt)
    return answer

In [None]:
answer = generate_answer_2(None, "qué es un array")
print(answer)