# Librerias

In [None]:
import requests
from bs4 import BeautifulSoup
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama.llms import OllamaLLM
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Web scrapping

In [None]:
# Extrae el texto de una página web utilizando BeautifulSoup.
# URL de la página web que deseas procesar
url = "https://www.bbc.com/news/articles/c047r4kreg3o"  # Cambia esta URL por la que quieras

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Extraer todo el texto de los elementos <p>
paragraphs = soup.find_all('p')
web_content = "\n".join([p.get_text() for p in paragraphs])  # Concatenar todos los textos de los párrafos

# Procesado de datos en vectores

In [None]:
"""
Procesa el contenido de la página web, lo divide en fragmentos y crea un vector store utilizando Chroma.
"""

# Dividir el texto en fragmentos manejables
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_text(web_content)

# Crear embeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

from langchain_chroma import Chroma

vector_store = Chroma.from_texts(
    texts=split_docs,
    collection_name="some_facts",
    embedding=embeddings,
    persist_directory="./chroma_some_facts",
)

retriever = vector_store.as_retriever()

# Modelo de lenguaje (LLM)

In [10]:
"""
Realiza una consulta en el vector store y utiliza un modelo LLM para generar una respuesta.
"""

# Prompt
template = """Answer based on the given context:
{context}

Question: {question}
Anwers: in English.
"""
prompt = ChatPromptTemplate.from_template(template)

# Crear un LLM de OpenAI
llm = OllamaLLM(model='llama3.2')

# Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)



# Main Code

In [12]:
# Ejemplo de uso
if __name__ == "__main__":
    
    # Realizar una consulta
    pregunta = "What happended to Sarkozy?"
    print(f'Pregunta: {pregunta}')
    print("Consultando el sistema RAG...")
    # Ejecutar la consulta

    print("Respuesta:")
    print(chain.invoke(pregunta))

Pregunta: What happended to Sarkozy 
Consultando el sistema RAG...
Respuesta:
According to the provided context, here are the details about what happened to Sarkozy:

* He was originally sentenced to 3 years in jail in 2021, but 2 of those years were suspended and the third converted to electronic monitoring instead of prison.
* He was convicted of trying to bribe a judge in 2014 for suggesting he could secure a prestigious job in return for information about a separate case.
* The conviction was upheld by France's highest court, the Cour de Cassation, which means Sarkozy must now wear an electronic monitoring bracelet for a year as part of his sentence.
* His planned appeal to the European Court of Human Rights will not delay the verdict from being carried out.
