In [50]:
import requests
from bs4 import BeautifulSoup
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
from pymongo import MongoClient
from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo.errors import OperationFailure
from langchain.prompts import ChatPromptTemplate
from langchain.docstore.document import Document

In [51]:
# URL de la página web
url = "https://www.inspiredtaste.net/24593/essential-pancake-recipe/"

In [52]:
client = MongoClient("mongodb+srv://carlotaporteiro24:cgpmongoatlas01@cluster0.vrrpq.mongodb.net/") 
db = "vectorstore_Ingles"
collection = "web_data"
MONGODB_COLLECTION = client[db][collection]
ATLAS_VECTOR_SEARCH_INDEX_NAME = "i"

## Extraer info y dividirla

In [53]:
def extraer_info_url(url):
    #Conectarme a la página web y extraer la información
    text_data = ""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            text_data = " ".join([p.text for p in soup.find_all('p')])
        else:
            print(f"Error al acceder a la página, código de estado: {response.status_code}")
    except requests.RequestException as e:
        print(f"Se produjo un error durante la solicitud HTTP: {e}")
    
    return text_data 

def dividir_texto(texto):
    #Divide el texto en fragmentos manejables para la indexación.
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return splitter.split_text(texto)


def extraer_texto_y_dividir(url):
    #Extrae el texto de una página web y lo divide en fragmentos.
    texto = extraer_info_url(url)
    if not texto:
        print("El texto estaba vacío")
        texto = []
    
    return dividir_texto(texto)

## Crear embeddings y vectorstore

In [54]:
def guardar_documentos_vectorstore(chunks, vector_store):
    # Crear los documentos a partir de los fragmentos de texto
    documents = [Document(page_content=chunk) for chunk in chunks]
    
    # Insertar los documentos en el vector store de MongoDB Atlas
    vector_store.add_documents(documents)
    print("Documents añadidos al vectorstore.")

In [55]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# Configuración de MongoDB Atlas Vector Search
vectorstore = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine" 
)

chunks = extraer_texto_y_dividir(url)

guardar_documentos_vectorstore(chunks, vectorstore)

Documents añadidos al vectorstore.


## Hacer consultas a Ollama

In [56]:
llm = OllamaLLM(model="llama3.2", server_url="http://localhost:11434")  

# Crear el prompt para la cadena de preguntas y respuestas
prompt = ChatPromptTemplate.from_template(
    template= "Use the context below to answer the user's question:\n\n{context}\n\nQuestion: {question}\nAnswer:"
)

def realizar_consulta(vectorstore, consulta):
    #Realiza una consulta al vector store utilizando un modelo LLM.
    
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    respuesta = qa_chain.run(consulta)
    return respuesta

In [62]:
# Ejemplo de consulta
consulta = "How to make Easy Fluffy Pancakes?"
if vectorstore:
    respuesta = realizar_consulta(vectorstore, consulta)
    print(f"Respuesta: {respuesta}")

Respuesta: Here's a helpful answer:

To make easy fluffy pancakes, follow these steps:

Ingredients:
- 1 cup all-purpose flour
- 2 tablespoons sugar
- 2 teaspoons baking powder
- 1/4 teaspoon salt
- 1 cup milk
- 1 large egg
- 2 tablespoons melted butter

Instructions:
1. In a medium bowl, whisk together the dry ingredients (flour, sugar, baking powder, and salt).
2. In a separate bowl, whisk together the wet ingredients (milk, egg, and melted butter).
3. Pour the wet ingredients into the dry ingredients and stir until just combined. The batter should still be slightly lumpy.
4. Heat a non-stick pan or griddle over medium heat. Grease with butter or cooking spray if necessary.
5. Using a 1/4 cup measuring cup, scoop the batter onto the pan.
6. Cook for 2-3 minutes, until bubbles appear on the surface and the edges start to dry. Flip and cook for an additional 1-2 minutes, until golden brown.
7. Serve warm with your favorite toppings, such as maple syrup, fresh fruit, or whipped cream.

