In [2]:
import requests
import gradio as gr
from bs4 import BeautifulSoup
from langchain_ollama.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import requests
from bs4 import BeautifulSoup

url = 'https://en.wikipedia.org/wiki/Lionel_Messi'

response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    page_text = soup.get_text(separator=' ', strip=True)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
    web_texts = text_splitter.split_text(page_text)
    print(web_texts)
else:
    print(f'No se pudo accede a la página: {response.status_code}')

In [4]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [5]:
bbdd_vector = Chroma.from_texts(
    texts=web_texts,
    collection_name="players2",
    embedding=embeddings,
)
print(f"Se creó el vector store con {len(web_texts)} documentos.")

In [16]:
retriever = bbdd_vector.as_retriever()
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = "llama3.2"
modelo = ChatOllama(model=llm)
chain = (
    {"context": retriever, "question": RunnablePassthrough()} | prompt| modelo| StrOutputParser()
)

def buscar_bbdd(input,history):
    try:
        response = chain.invoke(input)
        return response  
    except Exception as e:
        return f"Error al procesar la consulta: {e}"

In [17]:
with gr.Blocks(theme=gr.themes.Glass()) as demo:
    gr.Markdown("# CHAT MESSI")
    
    chat_interface = gr.ChatInterface(fn=buscar_bbdd,type="messages" )
demo.launch()

* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.


